Merge tag 'powerpc-4.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc fixes from Michael Ellerman: "Nothing that really stands out, just a bunch of fixes that have come in in the last couple of weeks. None of these are actually fixes for code that is new in 4.13. It's roughly half older bugs, with fixes going to stable, and half fixes/updates for Power9. Thanks to: Aneesh Kumar K.V, Anton Blanchard, Balbir Singh, Benjamin Herrenschmidt, Madhavan Srinivasan, Michael Neuling, Nicholas Piggin, Oliver O'Halloran" * tag 'powerpc-4.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/64: Fix atomic64_inc_not_zero() to return an int powerpc: Fix emulation of mfocrf in emulate_step() powerpc: Fix emulation of mcrf in emulate_step() powerpc/perf: Add POWER9 alternate PM_RUN_CYC and PM_RUN_INST_CMPL events powerpc/perf: Fix SDAR_MODE value for continous sampling on Power9 powerpc/asm: Mark cr0 as clobbered in mftb() powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9 powerpc/mm/radix: Synchronize updates to the process table powerpc/mm/radix: Properly clear process table entry powerpc/powernv: Tell OPAL about our MMU mode on POWER9 powerpc/kexec: Fix radix to hash kexec due to IAMR/AMOR

commit: deed9deb62f9121ea16f06351d1e968685a74863 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Fri Jul 14 15:33:15 2017 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Fri Jul 14 15:33:15 2017 -0700
tree: 124b1282c78c4aba97f53694ba0bc0489c014a8c
parent: ccd5d1b91f22351b55feb6fdee504cb84d97752f [diff]
parent: 01e6a61aceb82e13bec29502a8eb70d9574f97ad [diff]
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 3b5c3bc..f34e5923 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd

@@ -229,6 +229,6 @@
 Contact:	linux-mtd@lists.infradead.org
 Description:
 		For a partition, the offset of that partition from the start
-		of the master device in bytes. This attribute is absent on
-		main devices, so it can be used to distinguish between
-		partitions and devices that aren't partitions.
+		of the parent (another partition or a flash device) in bytes.
+		This attribute is absent on flash devices, so it can be used
+		to distinguish them from partitions.

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a809f60..84c606f 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs

@@ -75,7 +75,7 @@
 Description:
 		 Controls the memory footprint used by f2fs.
 
-What:		/sys/fs/f2fs/<disk>/trim_sections
+What:		/sys/fs/f2fs/<disk>/batched_trim_sections
 Date:		February 2015
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
 Description:
@@ -112,3 +112,21 @@
 Contact:	"Shuoran Liu" <liushuoran@huawei.com>
 Description:
 		 Shows total written kbytes issued to disk.
+
+What:		/sys/fs/f2fs/<disk>/inject_rate
+Date:		May 2016
+Contact:	"Sheng Yong" <shengyong1@huawei.com>
+Description:
+		 Controls the injection rate.
+
+What:		/sys/fs/f2fs/<disk>/inject_type
+Date:		May 2016
+Contact:	"Sheng Yong" <shengyong1@huawei.com>
+Description:
+		 Controls the injection type.
+
+What:		/sys/fs/f2fs/<disk>/reserved_blocks
+Date:		June 2017
+Contact:	"Chao Yu" <yuchao0@huawei.com>
+Description:
+		 Controls current reserved blocks in system.

diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index c413313..7a065ac 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt

@@ -42,7 +42,7 @@
 
 Unfortunately the memory available for ISA DMA is scarce so unless you
 allocate the memory during boot-up it's a good idea to also pass
-__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder.
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
 
 (This scarcity also means that you should allocate the buffer as
 early as possible and not release it until the driver is unloaded.)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index f5f93dc..66e8ce1 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt

@@ -61,12 +61,15 @@
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456        |
-| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
 | Cavium         | ThunderX Core   | #30115          | CAVIUM_ERRATUM_30115        |
+| Cavium         | ThunderX SMMUv2 | #27704          | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #74             | N/A                         |
+| Cavium         | ThunderX2 SMMUv3| #126            | N/A                         |
 |                |                 |                 |                             |
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 |                |                 |                 |                             |
 | Hisilicon      | Hip0{5,6,7}     | #161010101      | HISILICON_ERRATUM_161010101 |
+| Hisilicon      | Hip0{6,7}       | #161010701      | N/A                         |
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index f56ec97..934c44e 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt

@@ -192,7 +192,7 @@
     supported by the block device.
 
 
-    int bio_integrity_prep(bio);
+    bool bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
       filesystem must call bio_integrity_prep(bio).
@@ -201,9 +201,7 @@
       sector must be set, and the bio should have all data pages
       added.  It is up to the caller to ensure that the bio does not
       change while I/O is in progress.
-
-      bio_integrity_prep() should only be called if
-      bio_integrity_enabled() returned 1.
+      Complete bio with error if prepare failed for some reson.
 
 
 5.3 PASSING EXISTING INTEGRITY METADATA

diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index 946e691..cefb636 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt

@@ -789,23 +789,46 @@
 system. It might be too late to consult with vmstat or any other
 statistics, so it's advisable to take an immediate action.
 
-The events are propagated upward until the event is handled, i.e. the
-events are not pass-through. Here is what this means: for example you have
-three cgroups: A->B->C. Now you set up an event listener on cgroups A, B
-and C, and suppose group C experiences some pressure. In this situation,
-only group C will receive the notification, i.e. groups A and B will not
-receive it. This is done to avoid excessive "broadcasting" of messages,
-which disturbs the system and which is especially bad if we are low on
-memory or thrashing. So, organize the cgroups wisely, or propagate the
-events manually (or, ask us to implement the pass-through events,
-explaining why would you need them.)
+By default, events are propagated upward until the event is handled, i.e. the
+events are not pass-through. For example, you have three cgroups: A->B->C. Now
+you set up an event listener on cgroups A, B and C, and suppose group C
+experiences some pressure. In this situation, only group C will receive the
+notification, i.e. groups A and B will not receive it. This is done to avoid
+excessive "broadcasting" of messages, which disturbs the system and which is
+especially bad if we are low on memory or thrashing. Group B, will receive
+notification only if there are no event listers for group C.
+
+There are three optional modes that specify different propagation behavior:
+
+ - "default": this is the default behavior specified above. This mode is the
+   same as omitting the optional mode parameter, preserved by backwards
+   compatibility.
+
+ - "hierarchy": events always propagate up to the root, similar to the default
+   behavior, except that propagation continues regardless of whether there are
+   event listeners at each level, with the "hierarchy" mode. In the above
+   example, groups A, B, and C will receive notification of memory pressure.
+
+ - "local": events are pass-through, i.e. they only receive notifications when
+   memory pressure is experienced in the memcg for which the notification is
+   registered. In the above example, group C will receive notification if
+   registered for "local" notification and the group experiences memory
+   pressure. However, group B will never receive notification, regardless if
+   there is an event listener for group C or not, if group B is registered for
+   local notification.
+
+The level and event notification mode ("hierarchy" or "local", if necessary) are
+specified by a comma-delimited string, i.e. "low,hierarchy" specifies
+hierarchical, pass-through, notification for all ancestor memcgs. Notification
+that is the default, non pass-through behavior, does not specify a mode.
+"medium,local" specifies pass-through notification for the medium level.
 
 The file memory.pressure_level is only used to setup an eventfd. To
 register a notification, an application must:
 
 - create an eventfd using eventfd(2);
 - open memory.pressure_level;
-- write string like "<event_fd> <fd of memory.pressure_level> <level>"
+- write string as "<event_fd> <fd of memory.pressure_level> <level[,mode]>"
   to cgroup.event_control.
 
 Application will be notified through eventfd when memory pressure is at
@@ -821,7 +844,7 @@
    # cd /sys/fs/cgroup/memory/
    # mkdir foo
    # cd foo
-   # cgroup_event_listener memory.pressure_level low &
+   # cgroup_event_listener memory.pressure_level low,hierarchy &
    # echo 8000000 > memory.limit_in_bytes
    # echo 8000000 > memory.memsw.limit_in_bytes
    # echo $$ > tasks

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 9ec8488..17b0091 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst

@@ -114,7 +114,7 @@
 User Space Memory Access
 ------------------------
 
-.. kernel-doc:: arch/x86/include/asm/uaccess_32.h
+.. kernel-doc:: arch/x86/include/asm/uaccess.h
    :internal:
 
 .. kernel-doc:: arch/x86/lib/usercopy_32.c

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index b82b6ad..5969bf4 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt

@@ -10,6 +10,7 @@
     - Signature verification.
   - Asymmetric key subtypes.
   - Instantiation data parsers.
+  - Keyring link restrictions.
 
 
 ========
@@ -318,7 +319,8 @@
 =========================
 
 Keyrings created from userspace using add_key can be configured to check the
-signature of the key being linked.
+signature of the key being linked.  Keys without a valid signature are not
+allowed to link.
 
 Several restriction methods are available:
 
@@ -327,9 +329,10 @@
      - Option string used with KEYCTL_RESTRICT_KEYRING:
        - "builtin_trusted"
 
-     The kernel builtin trusted keyring will be searched for the signing
-     key. The ca_keys kernel parameter also affects which keys are used for
-     signature verification.
+     The kernel builtin trusted keyring will be searched for the signing key.
+     If the builtin trusted keyring is not configured, all links will be
+     rejected.  The ca_keys kernel parameter also affects which keys are used
+     for signature verification.
 
  (2) Restrict using the kernel builtin and secondary trusted keyrings
 
@@ -337,8 +340,10 @@
        - "builtin_and_secondary_trusted"
 
      The kernel builtin and secondary trusted keyrings will be searched for the
-     signing key. The ca_keys kernel parameter also affects which keys are used
-     for signature verification.
+     signing key.  If the secondary trusted keyring is not configured, this
+     restriction will behave like the "builtin_trusted" option.  The ca_keys
+     kernel parameter also affects which keys are used for signature
+     verification.
 
  (3) Restrict using a separate key or keyring
 
@@ -346,7 +351,7 @@
        - "key_or_keyring:<key or keyring serial number>[:chain]"
 
      Whenever a key link is requested, the link will only succeed if the key
-     being linked is signed by one of the designated keys. This key may be
+     being linked is signed by one of the designated keys.  This key may be
      specified directly by providing a serial number for one asymmetric key, or
      a group of keys may be searched for the signing key by providing the
      serial number for a keyring.
@@ -354,7 +359,51 @@
      When the "chain" option is provided at the end of the string, the keys
      within the destination keyring will also be searched for signing keys.
      This allows for verification of certificate chains by adding each
-     cert in order (starting closest to the root) to one keyring.
+     certificate in order (starting closest to the root) to a keyring.  For
+     instance, one keyring can be populated with links to a set of root
+     certificates, with a separate, restricted keyring set up for each
+     certificate chain to be validated:
+
+	# Create and populate a keyring for root certificates
+	root_id=`keyctl add keyring root-certs "" @s`
+	keyctl padd asymmetric "" $root_id < root1.cert
+	keyctl padd asymmetric "" $root_id < root2.cert
+
+	# Create and restrict a keyring for the certificate chain
+	chain_id=`keyctl add keyring chain "" @s`
+	keyctl restrict_keyring $chain_id asymmetric key_or_keyring:$root_id:chain
+
+	# Attempt to add each certificate in the chain, starting with the
+	# certificate closest to the root.
+	keyctl padd asymmetric "" $chain_id < intermediateA.cert
+	keyctl padd asymmetric "" $chain_id < intermediateB.cert
+	keyctl padd asymmetric "" $chain_id < end-entity.cert
+
+     If the final end-entity certificate is successfully added to the "chain"
+     keyring, we can be certain that it has a valid signing chain going back to
+     one of the root certificates.
+
+     A single keyring can be used to verify a chain of signatures by
+     restricting the keyring after linking the root certificate:
+
+	# Create a keyring for the certificate chain and add the root
+	chain2_id=`keyctl add keyring chain2 "" @s`
+	keyctl padd asymmetric "" $chain2_id < root1.cert
+
+	# Restrict the keyring that already has root1.cert linked.  The cert
+	# will remain linked by the keyring.
+	keyctl restrict_keyring $chain2_id asymmetric key_or_keyring:0:chain
+
+	# Attempt to add each certificate in the chain, starting with the
+	# certificate closest to the root.
+	keyctl padd asymmetric "" $chain2_id < intermediateA.cert
+	keyctl padd asymmetric "" $chain2_id < intermediateB.cert
+	keyctl padd asymmetric "" $chain2_id < end-entity.cert
+
+     If the final end-entity certificate is successfully added to the "chain2"
+     keyring, we can be certain that there is a valid signing chain going back
+     to the root certificate that was added before the keyring was restricted.
+
 
 In all of these cases, if the signing key is found the signature of the key to
 be linked will be verified using the signing key.  The requested key is added

diff --git a/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
new file mode 100644
index 0000000..bd6480b
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-aspeed.txt

@@ -0,0 +1,48 @@
+Device tree configuration for the I2C busses on the AST24XX and AST25XX SoCs.
+
+Required Properties:
+- #address-cells	: should be 1
+- #size-cells		: should be 0
+- reg			: address offset and range of bus
+- compatible		: should be "aspeed,ast2400-i2c-bus"
+			  or "aspeed,ast2500-i2c-bus"
+- clocks		: root clock of bus, should reference the APB
+			  clock
+- interrupts		: interrupt number
+- interrupt-parent	: interrupt controller for bus, should reference a
+			  aspeed,ast2400-i2c-ic or aspeed,ast2500-i2c-ic
+			  interrupt controller
+
+Optional Properties:
+- bus-frequency	: frequency of the bus clock in Hz defaults to 100 kHz when not
+		  specified
+- multi-master	: states that there is another master active on this bus.
+
+Example:
+
+i2c {
+	compatible = "simple-bus";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges = <0 0x1e78a000 0x1000>;
+
+	i2c_ic: interrupt-controller@0 {
+		#interrupt-cells = <1>;
+		compatible = "aspeed,ast2400-i2c-ic";
+		reg = <0x0 0x40>;
+		interrupts = <12>;
+		interrupt-controller;
+	};
+
+	i2c0: i2c-bus@40 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		#interrupt-cells = <1>;
+		reg = <0x40 0x40>;
+		compatible = "aspeed,ast2400-i2c-bus";
+		clocks = <&clk_apb>;
+		bus-frequency = <100000>;
+		interrupts = <0>;
+		interrupt-parent = <&i2c_ic>;
+	};
+};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
index fee26dc..fbb0a6d8 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-designware.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt

@@ -20,7 +20,7 @@
  - i2c-sda-falling-time-ns : should contain the SDA falling time in nanoseconds.
    This value which is by default 300ns is used to compute the tHIGH period.
 
-Example :
+Examples :
 
 	i2c@f0000 {
 		#address-cells = <1>;
@@ -43,3 +43,17 @@
 		i2c-sda-falling-time-ns = <300>;
 		i2c-scl-falling-time-ns = <300>;
 	};
+
+	i2c@1120000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x2000 0x100>;
+		clock-frequency = <400000>;
+		clocks = <&i2cclk>;
+		interrupts = <0>;
+
+		eeprom@64 {
+			compatible = "linux,slave-24c02";
+			reg = <0x40000064>;
+		};
+	};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt b/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt
new file mode 100644
index 0000000..f1f3876
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-pca-platform.txt

@@ -0,0 +1,29 @@
+* NXP PCA PCA9564/PCA9665 I2C controller
+
+The PCA9564/PCA9665 serves as an interface between most standard
+parallel-bus microcontrollers/microprocessors and the serial I2C-bus
+and allows the parallel bus system to communicate bi-directionally
+with the I2C-bus.
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : one of "nxp,pca9564" or "nxp,pca9665"
+
+Optional properties
+ - interrupts : the interrupt number
+ - interrupt-parent : the phandle for the interrupt controller.
+   If an interrupt is not specified polling will be used.
+ - reset-gpios : gpio specifier for gpio connected to RESET_N pin. As the line
+   is active low, it should be marked GPIO_ACTIVE_LOW.
+ - clock-frequency : I2C bus frequency.
+
+Example:
+	i2c0: i2c@80000 {
+		compatible = "nxp,pca9564";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x80000 0x4>;
+		reset-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+		clock-frequency = <100000>;
+	};

diff --git a/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt b/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt
new file mode 100644
index 0000000..cb806d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-zx2967.txt

@@ -0,0 +1,22 @@
+ZTE zx2967 I2C controller
+
+Required properties:
+ - compatible: must be "zte,zx296718-i2c"
+ - reg: physical address and length of the device registers
+ - interrupts: a single interrupt specifier
+ - clocks: clock for the device
+ - #address-cells: should be <1>
+ - #size-cells: should be <0>
+ - clock-frequency: the desired I2C bus clock frequency.
+
+Examples:
+
+	i2c@112000 {
+		compatible = "zte,zx296718-i2c";
+		reg = <0x00112000 0x1000>;
+		interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&osc24m>;
+		#address-cells = <1>
+		#size-cells = <0>;
+		clock-frequency = <1600000>;
+	};

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index be57550..c9abbf3 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt

@@ -26,6 +26,12 @@
                       * "priq"      - PRI Queue not empty
                       * "cmdq-sync" - CMD_SYNC complete
                       * "gerror"    - Global Error activated
+                      * "combined"  - The combined interrupt is optional,
+				      and should only be provided if the
+				      hardware supports just a single,
+				      combined interrupt line.
+				      If provided, then the combined interrupt
+				      will be used in preference to any others.
 
 - #iommu-cells      : See the generic IOMMU binding described in
                         devicetree/bindings/pci/pci-iommu.txt
@@ -49,6 +55,12 @@
 - hisilicon,broken-prefetch-cmd
                     : Avoid sending CMD_PREFETCH_* commands to the SMMU.
 
+- cavium,cn9900-broken-page1-regspace
+                    : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
+		      PRIQ_PROD/CONS register access with page 0 offsets.
+		      Set for Cavium ThunderX2 silicon that doesn't support
+		      SMMU page1 register space.
+
 ** Example
 
         smmu@2b400000 {

diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
index e593bbe..504291d 100644
--- a/Documentation/devicetree/bindings/mtd/denali-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt

@@ -3,10 +3,23 @@
 Required properties:
   - compatible : should be one of the following:
       "altr,socfpga-denali-nand"            - for Altera SOCFPGA
+      "socionext,uniphier-denali-nand-v5a"  - for Socionext UniPhier (v5a)
+      "socionext,uniphier-denali-nand-v5b"  - for Socionext UniPhier (v5b)
   - reg : should contain registers location and length for data and reg.
   - reg-names: Should contain the reg names "nand_data" and "denali_reg"
   - interrupts : The interrupt number.
 
+Optional properties:
+  - nand-ecc-step-size: see nand.txt for details.  If present, the value must be
+      512        for "altr,socfpga-denali-nand"
+      1024       for "socionext,uniphier-denali-nand-v5a"
+      1024       for "socionext,uniphier-denali-nand-v5b"
+  - nand-ecc-strength: see nand.txt for details.  Valid values are:
+      8, 15      for "altr,socfpga-denali-nand"
+      8, 16, 24  for "socionext,uniphier-denali-nand-v5a"
+      8, 16      for "socionext,uniphier-denali-nand-v5b"
+  - nand-ecc-maximize: see nand.txt for details
+
 The device tree may optionally contain sub-nodes describing partitions of the
 address space. See partition.txt for more detail.
 

diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
index 8c1528c..59ddc61 100644
--- a/Documentation/devicetree/bindings/mtd/elm.txt
+++ b/Documentation/devicetree/bindings/mtd/elm.txt

@@ -1,7 +1,7 @@
 Error location module
 
 Required properties:
-- compatible: Must be "ti,am33xx-elm"
+- compatible: Must be "ti,am3352-elm"
 - reg: physical base address and size of the registers map.
 - interrupts: Interrupt number for the elm.
 

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
index 174f68c..dd55904 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt

@@ -5,7 +5,7 @@
 
 All timing relevant properties as well as generic gpmc child properties are
 explained in a separate documents - please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 For NAND specific properties such as ECC modes or bus width, please refer to
 Documentation/devicetree/bindings/mtd/nand.txt

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
index 4828c17..131d3a74 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt

@@ -5,7 +5,7 @@
 
 All timing relevant properties as well as generic GPMC child properties are
 explained in a separate documents. Please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Required properties:
 - bank-width: 		Width of NOR flash in bytes. GPMC supports 8-bit and
@@ -28,7 +28,7 @@
 
 Optional properties:
 - gpmc,XXX		Additional GPMC timings and settings parameters. See
-			Documentation/devicetree/bindings/bus/ti-gpmc.txt
+			Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Optional properties for partition table parsing:
 - #address-cells: should be set to 1

diff --git a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
index 5d8fa52..b6e8bfd 100644
--- a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt

@@ -5,7 +5,7 @@
 
 All timing relevant properties as well as generic gpmc child properties are
 explained in a separate documents - please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Required properties:
 

diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
index d02acaf..b289ef3 100644
--- a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt

@@ -4,7 +4,12 @@
 NAND flash chips.
 
 Required properties:
-  - compatible : should be "fsl,<chip>-gpmi-nand"
+  - compatible : should be "fsl,<chip>-gpmi-nand", chip can be:
+    * imx23
+    * imx28
+    * imx6q
+    * imx6sx
+    * imx7d
   - reg : should contain registers location and length for gpmi and bch.
   - reg-names: Should contain the reg names "gpmi-nand" and "bch"
   - interrupts : BCH interrupt number.
@@ -13,6 +18,13 @@
     and GPMI DMA channel ID.
     Refer to dma.txt and fsl-mxs-dma.txt for details.
   - dma-names: Must be "rx-tx".
+  - clocks : clocks phandle and clock specifier corresponding to each clock
+    specified in clock-names.
+  - clock-names : The "gpmi_io" clock is always required. Which clocks are
+    exactly required depends on chip:
+    * imx23/imx28 : "gpmi_io"
+    * imx6q/sx : "gpmi_io", "gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch"
+    * imx7d : "gpmi_io", "gpmi_bch_apb"
 
 Optional properties:
   - nand-on-flash-bbt: boolean to enable on flash bbt option if not

diff --git a/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt
new file mode 100644
index 0000000..7328eb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/microchip,mchp23k256.txt

@@ -0,0 +1,18 @@
+* MTD SPI driver for Microchip 23K256 (and similar) serial SRAM
+
+Required properties:
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+  representing partitions.
+- compatible : Must be one of "microchip,mchp23k256" or "microchip,mchp23lcv1024"
+- reg : Chip-Select number
+- spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
+
+Example:
+
+	spi-sram@0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		compatible = "microchip,mchp23k256";
+		reg = <0>;
+		spi-max-frequency = <20000000>;
+	};

diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt
index 069c192..dbf9e05 100644
--- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt

@@ -12,7 +12,8 @@
 
 The first part of NFC is NAND Controller Interface (NFI) HW.
 Required NFI properties:
-- compatible:			Should be "mediatek,mtxxxx-nfc".
+- compatible:			Should be one of "mediatek,mt2701-nfc",
+				"mediatek,mt2712-nfc".
 - reg:				Base physical address and size of NFI.
 - interrupts:			Interrupts of NFI.
 - clocks:			NFI required clocks.
@@ -141,7 +142,7 @@
 ==============
 
 Required BCH properties:
-- compatible:	Should be "mediatek,mtxxxx-ecc".
+- compatible:	Should be one of "mediatek,mt2701-ecc", "mediatek,mt2712-ecc".
 - reg:		Base physical address and size of ECC.
 - interrupts:	Interrupts of ECC.
 - clocks:	ECC required clocks.

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index b056016..133f381 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt

@@ -21,7 +21,7 @@
 
 - nand-ecc-mode : String, operation mode of the NAND ecc mode.
 		  Supported values are: "none", "soft", "hw", "hw_syndrome",
-		  "hw_oob_first".
+		  "hw_oob_first", "on-die".
 		  Deprecated values:
 		  "soft_bch": use "soft" and nand-ecc-algo instead
 - nand-ecc-algo: string, algorithm of NAND ECC.

diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
index 81a224d..36f3b769 100644
--- a/Documentation/devicetree/bindings/mtd/partition.txt
+++ b/Documentation/devicetree/bindings/mtd/partition.txt

@@ -1,29 +1,49 @@
-Representing flash partitions in devicetree
+Flash partitions in device tree
+===============================
 
-Partitions can be represented by sub-nodes of an mtd device. This can be used
+Flash devices can be partitioned into one or more functional ranges (e.g. "boot
+code", "nvram", "kernel").
+
+Different devices may be partitioned in a different ways. Some may use a fixed
+flash layout set at production time. Some may use on-flash table that describes
+the geometry and naming/purpose of each functional region. It is also possible
+to see these methods mixed.
+
+To assist system software in locating partitions, we allow describing which
+method is used for a given flash device. To describe the method there should be
+a subnode of the flash device that is named 'partitions'. It must have a
+'compatible' property, which is used to identify the method to use.
+
+We currently only document a binding for fixed layouts.
+
+
+Fixed Partitions
+================
+
+Partitions can be represented by sub-nodes of a flash device. This can be used
 on platforms which have strong conventions about which portions of a flash are
 used for what purposes, but which don't use an on-flash partition table such
 as RedBoot.
 
-The partition table should be a subnode of the mtd node and should be named
+The partition table should be a subnode of the flash node and should be named
 'partitions'. This node should have the following property:
 - compatible : (required) must be "fixed-partitions"
 Partitions are then defined in subnodes of the partitions node.
 
-For backwards compatibility partitions as direct subnodes of the mtd device are
+For backwards compatibility partitions as direct subnodes of the flash device are
 supported. This use is discouraged.
 NOTE: also for backwards compatibility, direct subnodes that have a compatible
 string are not considered partitions, as they may be used for other bindings.
 
 #address-cells & #size-cells must both be present in the partitions subnode of the
-mtd device. There are two valid values for both:
+flash device. There are two valid values for both:
 <1>: for partitions that require a single 32-bit cell to represent their
      size/address (aka the value is below 4 GiB)
 <2>: for partitions that require two 32-bit cells to represent their
      size/address (aka the value is 4 GiB or greater).
 
 Required properties:
-- reg : The partition's offset and size within the mtd bank.
+- reg : The partition's offset and size within the flash
 
 Optional properties:
 - label : The label / name for this partition.  If omitted, the label is taken

diff --git a/Documentation/devicetree/bindings/net/gpmc-eth.txt b/Documentation/devicetree/bindings/net/gpmc-eth.txt
index ace4a64..f7da3d7 100644
--- a/Documentation/devicetree/bindings/net/gpmc-eth.txt
+++ b/Documentation/devicetree/bindings/net/gpmc-eth.txt

@@ -9,7 +9,7 @@
 
 All timing relevant properties as well as generic GPMC child properties are
 explained in a separate documents. Please refer to
-Documentation/devicetree/bindings/bus/ti-gpmc.txt
+Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 For the properties relevant to the ethernet controller connected to the GPMC
 refer to the binding documentation of the device. For example, the documentation
@@ -43,7 +43,7 @@
 
 Optional properties:
 - gpmc,XXX		Additional GPMC timings and settings parameters. See
-			Documentation/devicetree/bindings/bus/ti-gpmc.txt
+			Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt
 
 Example:
 

diff --git a/Documentation/devicetree/bindings/pwm/pwm-meson.txt b/Documentation/devicetree/bindings/pwm/pwm-meson.txt
index 5376a44..5b07beb 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-meson.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-meson.txt

@@ -2,7 +2,9 @@
 ============================
 
 Required properties:
-- compatible: Shall contain "amlogic,meson8b-pwm" or "amlogic,meson-gxbb-pwm".
+- compatible: Shall contain "amlogic,meson8b-pwm"
+                         or "amlogic,meson-gxbb-pwm"
+                         or "amlogic,meson-gxbb-ao-pwm"
 - #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
   the cells format.
 

diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
index 6dd0403..3e6d550 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32.txt

@@ -24,7 +24,7 @@
 		compatible = "st,stm32-timers";
 		reg = <0x40010000 0x400>;
 		clocks = <&rcc 0 160>;
-		clock-names = "clk_int";
+		clock-names = "int";
 
 		pwm {
 			compatible = "st,stm32-pwm";

diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index d6de643..7e94b80 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt

@@ -8,6 +8,7 @@
  - "renesas,pwm-r8a7791": for R-Car M2-W
  - "renesas,pwm-r8a7794": for R-Car E2
  - "renesas,pwm-r8a7795": for R-Car H3
+ - "renesas,pwm-r8a7796": for R-Car M3-W
 - reg: base address and length of the registers block for the PWM.
 - #pwm-cells: should be 2. See pwm.txt in this directory for a description of
   the cells format.

diff --git a/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt b/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt
new file mode 100644
index 0000000..1d990bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/brcm,brcmstb-waketimer.txt

@@ -0,0 +1,22 @@
+Broadcom STB wake-up Timer
+
+The Broadcom STB wake-up timer provides a 27Mhz resolution timer, with the
+ability to wake up the system from low-power suspend/standby modes.
+
+Required properties:
+- compatible     : should contain "brcm,brcmstb-waketimer"
+- reg            : the register start and length for the WKTMR block
+- interrupts     : The TIMER interrupt
+- interrupt-parent: The phandle to the Always-On (AON) Power Management (PM) L2
+                    interrupt controller node
+- clocks	 : The phandle to the UPG fixed clock (27Mhz domain)
+
+Example:
+
+waketimer@f0411580 {
+	compatible = "brcm,brcmstb-waketimer";
+	reg = <0xf0411580 0x14>;
+	interrupts = <0x3>;
+	interrupt-parent = <&aon_pm_l2_intc>;
+	clocks = <&upg_fixed>;
+};

diff --git a/Documentation/devicetree/bindings/rtc/cortina,gemini.txt b/Documentation/devicetree/bindings/rtc/cortina,gemini.txt
deleted file mode 100644
index 4ce4e79..0000000
--- a/Documentation/devicetree/bindings/rtc/cortina,gemini.txt
+++ /dev/null

@@ -1,14 +0,0 @@
-* Cortina Systems Gemini RTC
-
-Gemini SoC real-time clock.
-
-Required properties:
-- compatible : Should be "cortina,gemini-rtc"
-
-Examples:
-
-rtc@45000000 {
-	compatible = "cortina,gemini-rtc";
-	reg = <0x45000000 0x100>;
-	interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
-};

diff --git a/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt
new file mode 100644
index 0000000..e3938f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt

@@ -0,0 +1,28 @@
+* Faraday Technology FTRTC010 Real Time Clock
+
+This RTC appears in for example the Storlink Gemini family of
+SoCs.
+
+Required properties:
+- compatible : Should be one of:
+  "faraday,ftrtc010"
+  "cortina,gemini-rtc", "faraday,ftrtc010"
+
+Optional properties:
+- clocks: when present should contain clock references to the
+  PCLK and EXTCLK clocks. Faraday calls the later CLK1HZ and
+  says the clock should be 1 Hz, but implementers actually seem
+  to choose different clocks here, like Cortina who chose
+  32768 Hz (a typical low-power clock).
+- clock-names: should name the clocks "PCLK" and "EXTCLK"
+  respectively.
+
+Examples:
+
+rtc@45000000 {
+	compatible = "cortina,gemini-rtc";
+	reg = <0x45000000 0x100>;
+	interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&foo 0>, <&foo 1>;
+	clock-names = "PCLK", "EXTCLK";
+};

diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
index e2837b9..0a4c371 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.txt

@@ -1,17 +1,25 @@
 STM32 Real Time Clock
 
 Required properties:
-- compatible: "st,stm32-rtc".
+- compatible: can be either "st,stm32-rtc" or "st,stm32h7-rtc", depending on
+  the device is compatible with stm32(f4/f7) or stm32h7.
 - reg: address range of rtc register set.
-- clocks: reference to the clock entry ck_rtc.
+- clocks: can use up to two clocks, depending on part used:
+  - "rtc_ck": RTC clock source.
+    It is required on stm32(f4/f7) and stm32h7.
+  - "pclk": RTC APB interface clock.
+    It is not present on stm32(f4/f7).
+    It is required on stm32h7.
+- clock-names: must be "rtc_ck" and "pclk".
+    It is required only on stm32h7.
 - interrupt-parent: phandle for the interrupt controller.
 - interrupts: rtc alarm interrupt.
 - st,syscfg: phandle for pwrcfg, mandatory to disable/enable backup domain
   (RTC registers) write protection.
 
-Optional properties (to override default ck_rtc parent clock):
-- assigned-clocks: reference to the ck_rtc clock entry.
-- assigned-clock-parents: phandle of the new parent clock of ck_rtc.
+Optional properties (to override default rtc_ck parent clock):
+- assigned-clocks: reference to the rtc_ck clock entry.
+- assigned-clock-parents: phandle of the new parent clock of rtc_ck.
 
 Example:
 
@@ -25,3 +33,17 @@
 		interrupts = <17 1>;
 		st,syscfg = <&pwrcfg>;
 	};
+
+	rtc: rtc@58004000 {
+		compatible = "st,stm32h7-rtc";
+		reg = <0x58004000 0x400>;
+		clocks = <&rcc RTCAPB_CK>, <&rcc RTC_CK>;
+		clock-names = "pclk", "rtc_ck";
+		assigned-clocks = <&rcc RTC_CK>;
+		assigned-clock-parents = <&rcc LSE_CK>;
+		interrupt-parent = <&exti>;
+		interrupts = <17 1>;
+		interrupt-names = "alarm";
+		st,syscfg = <&pwrcfg>;
+		status = "disabled";
+	};

diff --git a/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt
new file mode 100644
index 0000000..b935b52
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/da9062-wdt.txt

@@ -0,0 +1,23 @@
+* Dialog Semiconductor DA9062/61 Watchdog Timer
+
+Required properties:
+
+- compatible: should be one of the following valid compatible string lines:
+	"dlg,da9061-watchdog", "dlg,da9062-watchdog"
+	"dlg,da9062-watchdog"
+
+Example: DA9062
+
+	pmic0: da9062@58 {
+		watchdog {
+			compatible = "dlg,da9062-watchdog";
+		};
+	};
+
+Example: DA9061 using a fall-back compatible for the DA9062 watchdog driver
+
+	pmic0: da9061@58 {
+		watchdog {
+			compatible = "dlg,da9061-watchdog", "dlg,da9062-watchdog";
+		};
+	};

diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
index 08e16f6..eb09144 100644
--- a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt

@@ -10,6 +10,8 @@
 Optional Properties:
 
 - interrupts	: The interrupt used for the watchdog timeout warning.
+- resets	: phandle pointing to the system reset controller with
+		line index for the watchdog.
 
 Example:
 
@@ -18,4 +20,5 @@
 		reg = <0xffd02000 0x1000>;
 		interrupts = <0 171 4>;
 		clocks = <&per_base_clk>;
+		resets = <&rst WDT0_RESET>;
 	};

diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
index da24e31..9e306af 100644
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt

@@ -2,10 +2,11 @@
 
 Required properties:
 - compatible : Should be "renesas,<soctype>-wdt", and
-	       "renesas,rcar-gen3-wdt" as fallback.
+	       "renesas,rcar-gen3-wdt" or "renesas,rza-wdt" as fallback.
 	       Examples with soctypes are:
 	         - "renesas,r8a7795-wdt" (R-Car H3)
 	         - "renesas,r8a7796-wdt" (R-Car M3-W)
+	         - "renesas,r7s72100-wdt" (RZ/A1)
 
   When compatible with the generic version, nodes must list the SoC-specific
   version corresponding to the platform first, followed by the generic
@@ -17,6 +18,7 @@
 Optional properties:
 - timeout-sec : Contains the watchdog timeout in seconds
 - power-domains : the power domain the WDT belongs to
+- interrupts: Some WDTs have an interrupt when used in interval timer mode
 
 Examples:
 

diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt
new file mode 100644
index 0000000..cc13b10a
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.txt

@@ -0,0 +1,19 @@
+STM32 Independent WatchDoG (IWDG)
+---------------------------------
+
+Required properties:
+- compatible: "st,stm32-iwdg"
+- reg: physical base address and length of the registers set for the device
+- clocks: must contain a single entry describing the clock input
+
+Optional Properties:
+- timeout-sec: Watchdog timeout value in seconds.
+
+Example:
+
+iwdg: watchdog@40003000 {
+	compatible = "st,stm32-iwdg";
+	reg = <0x40003000 0x400>;
+	clocks = <&clk_lsi>;
+	timeout-sec = <32>;
+};

diff --git a/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt b/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt
new file mode 100644
index 0000000..bf63375
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/uniphier-wdt.txt

@@ -0,0 +1,20 @@
+UniPhier watchdog timer controller
+
+This UniPhier watchdog timer controller must be under sysctrl node.
+
+Required properties:
+- compatible: should be "socionext,uniphier-wdt"
+
+Example:
+
+	sysctrl@61840000 {
+		compatible = "socionext,uniphier-ld11-sysctrl",
+			     "simple-mfd", "syscon";
+		reg = <0x61840000 0x4000>;
+
+		watchdog {
+			compatible = "socionext,uniphier-wdt";
+		}
+
+		other nodes ...
+	};

diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index 472e7a6..ab82250 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst

@@ -106,9 +106,6 @@
 .. kernel-doc:: kernel/sys.c
    :export:
 
-.. kernel-doc:: kernel/rcu/srcu.c
-   :export:
-
 .. kernel-doc:: kernel/rcu/tree.c
    :export:
 

diff --git a/Documentation/driver-api/i2c.rst b/Documentation/driver-api/i2c.rst
index 0bf86a4..7582c07 100644
--- a/Documentation/driver-api/i2c.rst
+++ b/Documentation/driver-api/i2c.rst

@@ -41,5 +41,8 @@
 .. kernel-doc:: drivers/i2c/i2c-boardinfo.c
    :functions: i2c_register_board_info
 
-.. kernel-doc:: drivers/i2c/i2c-core.c
+.. kernel-doc:: drivers/i2c/i2c-core-base.c
+   :export:
+
+.. kernel-doc:: drivers/i2c/i2c-core-smbus.c
    :export:

diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 415484f3..192d8cb 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt

@@ -134,6 +134,22 @@
 	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
+o proc entries
+
+- /proc/self/task/<current-tid>/fail-nth:
+
+	Write to this file of integer N makes N-th call in the current task fail
+	(N is 0-based). Read from this file returns a single char 'Y' or 'N'
+	that says if the fault setup with a previous write to this file was
+	injected or not, and disables the fault if it wasn't yet injected.
+	Note that this file enables all types of faults (slab, futex, etc).
+	This setting takes precedence over all other generic debugfs settings
+	like probability, interval, times, etc. But per-capability settings
+	(e.g. fail_futex/ignore-private) take precedence over it.
+
+	This feature is intended for systematic testing of faults in a single
+	system call. See an example below.
+
 How to add new fault injection capability
 -----------------------------------------
 
@@ -278,3 +294,65 @@
 	# env FAILCMD_TYPE=fail_page_alloc \
 		./tools/testing/fault-injection/failcmd.sh --times=100 \
                 -- make -C tools/testing/selftests/ run_tests
+
+Systematic faults using fail-nth
+---------------------------------
+
+The following code systematically faults 0-th, 1-st, 2-nd and so on
+capabilities in the socketpair() system call.
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+	int i, err, res, fail_nth, fds[2];
+	char buf[128];
+
+	system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait");
+	sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid));
+	fail_nth = open(buf, O_RDWR);
+	for (i = 0;; i++) {
+		sprintf(buf, "%d", i);
+		write(fail_nth, buf, strlen(buf));
+		res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
+		err = errno;
+		read(fail_nth, buf, 1);
+		if (res == 0) {
+			close(fds[0]);
+			close(fds[1]);
+		}
+		printf("%d-th fault %c: res=%d/%d\n", i, buf[0], res, err);
+		if (buf[0] != 'Y')
+			break;
+	}
+	return 0;
+}
+
+An example output:
+
+0-th fault Y: res=-1/23
+1-th fault Y: res=-1/23
+2-th fault Y: res=-1/23
+3-th fault Y: res=-1/12
+4-th fault Y: res=-1/12
+5-th fault Y: res=-1/23
+6-th fault Y: res=-1/23
+7-th fault Y: res=-1/23
+8-th fault Y: res=-1/12
+9-th fault Y: res=-1/12
+10-th fault Y: res=-1/12
+11-th fault Y: res=-1/12
+12-th fault Y: res=-1/12
+13-th fault Y: res=-1/12
+14-th fault Y: res=-1/12
+15-th fault Y: res=-1/12
+16-th fault N: res=0/12

diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
index 8444dc3..f10dd59 100644
--- a/Documentation/filesystems/autofs4.txt
+++ b/Documentation/filesystems/autofs4.txt

@@ -316,7 +316,7 @@
         struct autofs_v5_packet {
                 int proto_version;                /* Protocol version */
                 int type;                        /* Type of packet */
-                autofs_wqt_t wait_queue_entry_token;
+                autofs_wqt_t wait_queue_token;
                 __u32 dev;
                 __u64 ino;
                 __u32 uid;
@@ -341,12 +341,12 @@
 `O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
 most one packet, and any unread portion of a packet will be discarded.
 
-The `wait_queue_entry_token` is a unique number which can identify a
+The `wait_queue_token` is a unique number which can identify a
 particular request to be acknowledged.  When a message is sent over
 the pipe the affected dentry is marked as either "active" or
 "expiring" and other accesses to it block until the message is
 acknowledged using one of the ioctls below and the relevant
-`wait_queue_entry_token`.
+`wait_queue_token`.
 
 Communicating with autofs: root directory ioctls
 ------------------------------------------------
@@ -358,7 +358,7 @@
 The available ioctl commands are:
 
 - **AUTOFS_IOC_READY**: a notification has been handled.  The argument
-    to the ioctl command is the "wait_queue_entry_token" number
+    to the ioctl command is the "wait_queue_token" number
     corresponding to the notification being acknowledged.
 - **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
     the error code `ENOENT`.
@@ -382,14 +382,14 @@
         struct autofs_packet_expire_multi {
                 int proto_version;              /* Protocol version */
                 int type;                       /* Type of packet */
-                autofs_wqt_t wait_queue_entry_token;
+                autofs_wqt_t wait_queue_token;
                 int len;
                 char name[NAME_MAX+1];
         };
 
      is required.  This is filled in with the name of something
      that can be unmounted or removed.  If nothing can be expired,
-     `errno` is set to `EAGAIN`.  Even though a `wait_queue_entry_token`
+     `errno` is set to `EAGAIN`.  Even though a `wait_queue_token`
      is present in the structure, no "wait queue" is established
      and no acknowledgment is needed.
 - **AUTOFS_IOC_EXPIRE_MULTI**:  This is similar to

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 4f6531a..273ccb2 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt

@@ -155,11 +155,15 @@
                        enabled by default.
 data_flush             Enable data flushing before checkpoint in order to
                        persist data of regular and symlink.
+fault_injection=%d     Enable fault injection in all supported types with
+                       specified injection rate.
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
 io_bits=%u             Set the bit size of write IO requests. It should be set
                        with "mode=lfs".
+usrquota               Enable plain user disk quota accounting.
+grpquota               Enable plain group disk quota accounting.
 
 ================================================================================
 DEBUGFS ENTRIES

diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index c9e884b..36f528a 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt

@@ -201,6 +201,40 @@
 top, lower2 the middle and lower3 the bottom layer.
 
 
+Sharing and copying layers
+--------------------------
+
+Lower layers may be shared among several overlay mounts and that is indeed
+a very common practice.  An overlay mount may use the same lower layer
+path as another overlay mount and it may use a lower layer path that is
+beneath or above the path of another overlay lower layer path.
+
+Using an upper layer path and/or a workdir path that are already used by
+another overlay mount is not allowed and will fail with EBUSY.  Using
+partially overlapping paths is not allowed but will not fail with EBUSY.
+
+Mounting an overlay using an upper layer path, where the upper layer path
+was previously used by another mounted overlay in combination with a
+different lower layer path, is allowed, unless the "inodes index" feature
+is enabled.
+
+With the "inodes index" feature, on the first time mount, an NFS file
+handle of the lower layer root directory, along with the UUID of the lower
+filesystem, are encoded and stored in the "trusted.overlay.origin" extended
+attribute on the upper layer root directory.  On subsequent mount attempts,
+the lower root directory file handle and lower filesystem UUID are compared
+to the stored origin in upper root directory.  On failure to verify the
+lower root origin, mount will fail with ESTALE.  An overlayfs mount with
+"inodes index" enabled will fail with EOPNOTSUPP if the lower filesystem
+does not support NFS export, lower filesystem does not have a valid UUID or
+if the upper filesystem does not support extended attributes.
+
+It is quite a common practice to copy overlay layers to a different
+directory tree on the same or different underlying filesystem, and even
+to a different machine.  With the "inodes index" feature, trying to mount
+the copied layers will fail the verification of the lower root file handle.
+
+
 Non-standard behavior
 ---------------------
 

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 4cddbce..adba21b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt

@@ -1786,12 +1786,16 @@
 	pos:	0
 	flags:	02
 	mnt_id:	9
-	tfd:        5 events:       1d data: ffffffffffffffff
+	tfd:        5 events:       1d data: ffffffffffffffff pos:0 ino:61af sdev:7
 
 	where 'tfd' is a target file descriptor number in decimal form,
 	'events' is events mask being watched and the 'data' is data
 	associated with a target [see epoll(7) for more details].
 
+	The 'pos' is current offset of the target file in decimal form
+	[see lseek(2)], 'ino' and 'sdev' are inode and device numbers
+	where target file resides, all in hex format.
+
 	Fsnotify files
 	~~~~~~~~~~~~~~
 	For inotify files the format is the following

diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 820d904..0500193 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801

@@ -34,6 +34,8 @@
   * Intel Broxton (SOC)
   * Intel Lewisburg (PCH)
   * Intel Gemini Lake (SOC)
+  * Intel Cannon Lake-H (PCH)
+  * Intel Cannon Lake-LP (PCH)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller

diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface
index bcf919d..5ff1944 100644
--- a/Documentation/i2c/dev-interface
+++ b/Documentation/i2c/dev-interface

@@ -191,7 +191,7 @@
 4* Other ioctl() calls are converted to in-kernel function calls by
 i2c-dev. Examples include I2C_FUNCS, which queries the I2C adapter
 functionality using i2c.h:i2c_get_functionality(), and I2C_SMBUS, which
-performs an SMBus transaction using i2c-core.c:i2c_smbus_xfer().
+performs an SMBus transaction using i2c-core-smbus.c:i2c_smbus_xfer().
 
 The i2c-dev driver is responsible for checking all the parameters that
 come from user-space for validity. After this point, there is no
@@ -200,13 +200,13 @@
 directly. This means that I2C bus drivers don't need to implement
 anything special to support access from user-space.
 
-5* These i2c-core.c/i2c.h functions are wrappers to the actual
-implementation of your I2C bus driver. Each adapter must declare
-callback functions implementing these standard calls.
-i2c.h:i2c_get_functionality() calls i2c_adapter.algo->functionality(),
-while i2c-core.c:i2c_smbus_xfer() calls either
+5* These i2c.h functions are wrappers to the actual implementation of
+your I2C bus driver. Each adapter must declare callback functions
+implementing these standard calls. i2c.h:i2c_get_functionality() calls
+i2c_adapter.algo->functionality(), while
+i2c-core-smbus.c:i2c_smbus_xfer() calls either
 adapter.algo->smbus_xfer() if it is implemented, or if not,
-i2c-core.c:i2c_smbus_xfer_emulated() which in turn calls
+i2c-core-smbus.c:i2c_smbus_xfer_emulated() which in turn calls
 i2c_adapter.algo->master_xfer().
 
 After your I2C bus driver has processed these requests, execution runs

diff --git a/Documentation/input/index.rst b/Documentation/input/index.rst
index 7a3e71c..9888f5c 100644
--- a/Documentation/input/index.rst
+++ b/Documentation/input/index.rst

@@ -6,7 +6,6 @@
 
 .. toctree::
    :maxdepth: 2
-   :numbered:
 
    input_uapi
    input_kapi

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 615434d..5181445 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt

@@ -112,8 +112,8 @@
 2) Or use the system kernel binary itself as dump-capture kernel and there is
    no need to build a separate dump-capture kernel. This is possible
    only with the architectures which support a relocatable kernel. As
-   of today, i386, x86_64, ppc64, ia64 and arm architectures support relocatable
-   kernel.
+   of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support
+   relocatable kernel.
 
 Building a relocatable kernel is advantageous from the point of view that
 one does not have to build a second kernel for capturing the dump. But
@@ -339,7 +339,7 @@
 For arm64:
 	- Use vmlinux or Image
 
-If you are using a uncompressed vmlinux image then use following command
+If you are using an uncompressed vmlinux image then use following command
 to load dump-capture kernel.
 
    kexec -p <dump-capture-kernel-vmlinux-image> \
@@ -361,6 +361,12 @@
    --dtb=<dtb-for-dump-capture-kernel> \
    --append="root=<root-dev> <arch-specific-options>"
 
+If you are using an uncompressed Image, then use following command
+to load dump-capture kernel.
+
+   kexec -p <dump-capture-kernel-Image> \
+   --initrd=<initrd-for-dump-capture-kernel> \
+   --append="root=<root-dev> <arch-specific-options>"
 
 Please note, that --args-linux does not need to be specified for ia64.
 It is planned to make this a no-op on that architecture, but for now

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c239a0c..c4ddfcd 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt

@@ -1876,8 +1876,8 @@
      This makes sure that the death mark on the object is perceived to be set
      *before* the reference counter is decremented.
 
-     See Documentation/atomic_ops.txt for more information.  See the "Atomic
-     operations" subsection for information on where to use these.
+     See Documentation/core-api/atomic_ops.rst for more information.  See the
+     "Atomic operations" subsection for information on where to use these.
 
 
  (*) lockless_dereference();
@@ -2584,7 +2584,7 @@
 barriers, and so barrier instructions are superfluous in conjunction with them,
 and in such cases the special barrier primitives will be no-ops.
 
-See Documentation/atomic_ops.txt for more information.
+See Documentation/core-api/atomic_ops.rst for more information.
 
 
 ACCESSING DEVICES

diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 670f3de..5c628e1 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt

@@ -282,20 +282,26 @@
 % echo online > /sys/devices/system/memory/memoryXXX/state
 
 This onlining will not change the ZONE type of the target memory block,
-If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
+If the memory block doesn't belong to any zone an appropriate kernel zone
+(usually ZONE_NORMAL) will be used unless movable_node kernel command line
+option is specified when ZONE_MOVABLE will be used.
+
+You can explicitly request to associate it with ZONE_MOVABLE by
 
 % echo online_movable > /sys/devices/system/memory/memoryXXX/state
 (NOTE: current limit: this memory block must be adjacent to ZONE_MOVABLE)
 
-And if the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
+Or you can explicitly request a kernel zone (usually ZONE_NORMAL) by:
 
 % echo online_kernel > /sys/devices/system/memory/memoryXXX/state
 (NOTE: current limit: this memory block must be adjacent to ZONE_NORMAL)
 
+An explicit zone onlining can fail (e.g. when the range is already within
+and existing and incompatible zone already).
+
 After this, memory block XXX's state will be 'online' and the amount of
 available memory will be increased.
 
-Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
 This may be changed in future.
 
 

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 196ba17..1be0b6f 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt

@@ -44,8 +44,7 @@
 Supports multiple types of timestamp requests. As a result, this
 socket option takes a bitmap of flags, not a boolean. In
 
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
-                   sizeof(val));
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
 
 val is an integer with any of the following bits set. Setting other
 bit returns EINVAL and does not change the current state.
@@ -249,8 +248,7 @@
 
   __u32 val = SOF_TIMESTAMPING_SOFTWARE |
 	      SOF_TIMESTAMPING_OPT_ID /* or any other flag */;
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
-                   sizeof(val));
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
 
 
 1.4 Bytestream Timestamps

diff --git a/Documentation/ntb.txt b/Documentation/ntb.txt
index 1d9bbab..a5af4f0 100644
--- a/Documentation/ntb.txt
+++ b/Documentation/ntb.txt

@@ -1,14 +1,16 @@
 # NTB Drivers
 
 NTB (Non-Transparent Bridge) is a type of PCI-Express bridge chip that connects
-the separate memory systems of two computers to the same PCI-Express fabric.
-Existing NTB hardware supports a common feature set, including scratchpad
-registers, doorbell registers, and memory translation windows.  Scratchpad
-registers are read-and-writable registers that are accessible from either side
-of the device, so that peers can exchange a small amount of information at a
-fixed address.  Doorbell registers provide a way for peers to send interrupt
-events.  Memory windows allow translated read and write access to the peer
-memory.
+the separate memory systems of two or more computers to the same PCI-Express
+fabric. Existing NTB hardware supports a common feature set: doorbell
+registers and memory translation windows, as well as non common features like
+scratchpad and message registers. Scratchpad registers are read-and-writable
+registers that are accessible from either side of the device, so that peers can
+exchange a small amount of information at a fixed address. Message registers can
+be utilized for the same purpose. Additionally they are provided with with
+special status bits to make sure the information isn't rewritten by another
+peer. Doorbell registers provide a way for peers to send interrupt events.
+Memory windows allow translated read and write access to the peer memory.
 
 ## NTB Core Driver (ntb)
 
@@ -26,6 +28,87 @@
 registration uses the Linux Device framework, so it should feel familiar to
 anyone who has written a pci driver.
 
+### NTB Typical client driver implementation
+
+Primary purpose of NTB is to share some peace of memory between at least two
+systems. So the NTB device features like Scratchpad/Message registers are
+mainly used to perform the proper memory window initialization. Typically
+there are two types of memory window interfaces supported by the NTB API:
+inbound translation configured on the local ntb port and outbound translation
+configured by the peer, on the peer ntb port. The first type is
+depicted on the next figure
+
+Inbound translation:
+ Memory:              Local NTB Port:      Peer NTB Port:      Peer MMIO:
+  ____________
+ | dma-mapped |-ntb_mw_set_trans(addr)  |
+ | memory     |        _v____________   |   ______________
+ | (addr)     |<======| MW xlat addr |<====| MW base addr |<== memory-mapped IO
+ |------------|       |--------------|  |  |--------------|
+
+So typical scenario of the first type memory window initialization looks:
+1) allocate a memory region, 2) put translated address to NTB config,
+3) somehow notify a peer device of performed initialization, 4) peer device
+maps corresponding outbound memory window so to have access to the shared
+memory region.
+
+The second type of interface, that implies the shared windows being
+initialized by a peer device, is depicted on the figure:
+
+Outbound translation:
+ Memory:        Local NTB Port:    Peer NTB Port:      Peer MMIO:
+  ____________                      ______________
+ | dma-mapped |                |   | MW base addr |<== memory-mapped IO
+ | memory     |                |   |--------------|
+ | (addr)     |<===================| MW xlat addr |<-ntb_peer_mw_set_trans(addr)
+ |------------|                |   |--------------|
+
+Typical scenario of the second type interface initialization would be:
+1) allocate a memory region, 2) somehow deliver a translated address to a peer
+device, 3) peer puts the translated address to NTB config, 4) peer device maps
+outbound memory window so to have access to the shared memory region.
+
+As one can see the described scenarios can be combined in one portable
+algorithm.
+ Local device:
+  1) Allocate memory for a shared window
+  2) Initialize memory window by translated address of the allocated region
+     (it may fail if local memory window initialization is unsupported)
+  3) Send the translated address and memory window index to a peer device
+ Peer device:
+  1) Initialize memory window with retrieved address of the allocated
+     by another device memory region (it may fail if peer memory window
+     initialization is unsupported)
+  2) Map outbound memory window
+
+In accordance with this scenario, the NTB Memory Window API can be used as
+follows:
+ Local device:
+  1) ntb_mw_count(pidx) - retrieve number of memory ranges, which can
+     be allocated for memory windows between local device and peer device
+     of port with specified index.
+  2) ntb_get_align(pidx, midx) - retrieve parameters restricting the
+     shared memory region alignment and size. Then memory can be properly
+     allocated.
+  3) Allocate physically contiguous memory region in compliance with
+     restrictions retrieved in 2).
+  4) ntb_mw_set_trans(pidx, midx) - try to set translation address of
+     the memory window with specified index for the defined peer device
+     (it may fail if local translated address setting is not supported)
+  5) Send translated base address (usually together with memory window
+     number) to the peer device using, for instance, scratchpad or message
+     registers.
+ Peer device:
+  1) ntb_peer_mw_set_trans(pidx, midx) - try to set received from other
+     device (related to pidx) translated address for specified memory
+     window. It may fail if retrieved address, for instance, exceeds
+     maximum possible address or isn't properly aligned.
+  2) ntb_peer_mw_get_addr(widx) - retrieve MMIO address to map the memory
+     window so to have an access to the shared memory.
+
+Also it is worth to note, that method ntb_mw_count(pidx) should return the
+same value as ntb_peer_mw_count() on the peer with port index - pidx.
+
 ### NTB Transport Client (ntb\_transport) and NTB Netdev (ntb\_netdev)
 
 The primary client for NTB is the Transport client, used in tandem with NTB

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 789b27c..8fbf0aa 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt

@@ -1,4 +1,6 @@
+======================================
 Pulse Width Modulation (PWM) interface
+======================================
 
 This provides an overview about the Linux PWM interface
 
@@ -16,7 +18,7 @@
 
 Instead of referring to a PWM device via its unique ID, board setup code
 should instead register a static mapping that can be used to match PWM
-consumers to providers, as given in the following example:
+consumers to providers, as given in the following example::
 
 	static struct pwm_lookup board_pwm_lookup[] = {
 		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
@@ -40,9 +42,9 @@
 device or a consumer name. pwm_put() is used to free the PWM device. Managed
 variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist.
 
-After being requested, a PWM has to be configured using:
+After being requested, a PWM has to be configured using::
 
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+	int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
 
 This API controls both the PWM period/duty_cycle config and the
 enable/disable state.
@@ -72,11 +74,14 @@
 pwmchipN, where N is the base of the PWM chip. Inside the directory you
 will find:
 
-npwm - The number of PWM channels this chip supports (read-only).
+  npwm
+    The number of PWM channels this chip supports (read-only).
 
-export - Exports a PWM channel for use with sysfs (write-only).
+  export
+    Exports a PWM channel for use with sysfs (write-only).
 
-unexport - Unexports a PWM channel from sysfs (write-only).
+  unexport
+   Unexports a PWM channel from sysfs (write-only).
 
 The PWM channels are numbered using a per-chip index from 0 to npwm-1.
 
@@ -84,21 +89,26 @@
 pwmchipN directory it is associated with, where X is the number of the
 channel that was exported. The following properties will then be available:
 
-period - The total period of the PWM signal (read/write).
-	Value is in nanoseconds and is the sum of the active and inactive
-	time of the PWM.
+  period
+    The total period of the PWM signal (read/write).
+    Value is in nanoseconds and is the sum of the active and inactive
+    time of the PWM.
 
-duty_cycle - The active time of the PWM signal (read/write).
-	Value is in nanoseconds and must be less than the period.
+  duty_cycle
+    The active time of the PWM signal (read/write).
+    Value is in nanoseconds and must be less than the period.
 
-polarity - Changes the polarity of the PWM signal (read/write).
-	Writes to this property only work if the PWM chip supports changing
-	the polarity. The polarity can only be changed if the PWM is not
-	enabled. Value is the string "normal" or "inversed".
+  polarity
+    Changes the polarity of the PWM signal (read/write).
+    Writes to this property only work if the PWM chip supports changing
+    the polarity. The polarity can only be changed if the PWM is not
+    enabled. Value is the string "normal" or "inversed".
 
-enable - Enable/disable the PWM signal (read/write).
-	0 - disabled
-	1 - enabled
+  enable
+    Enable/disable the PWM signal (read/write).
+
+	- 0 - disabled
+	- 1 - enabled
 
 Implementing a PWM driver
 -------------------------

diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index ddc3660..c0c9774 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt

@@ -1,6 +1,6 @@
-
-	Real Time Clock (RTC) Drivers for Linux
-	=======================================
+=======================================
+Real Time Clock (RTC) Drivers for Linux
+=======================================
 
 When Linux developers talk about a "Real Time Clock", they usually mean
 something that tracks wall clock time and is battery backed so that it
@@ -32,8 +32,8 @@
 be able to schedule one any time in the upcoming century.
 
 
-	Old PC/AT-Compatible driver:  /dev/rtc
-	--------------------------------------
+Old PC/AT-Compatible driver:  /dev/rtc
+--------------------------------------
 
 All PCs (even Alpha machines) have a Real Time Clock built into them.
 Usually they are built into the chipset of the computer, but some may
@@ -105,8 +105,8 @@
 (The original /dev/rtc driver was written by Paul Gortmaker.)
 
 
-	New portable "RTC Class" drivers:  /dev/rtcN
-	--------------------------------------------
+New portable "RTC Class" drivers:  /dev/rtcN
+--------------------------------------------
 
 Because Linux supports many non-ACPI and non-PC platforms, some of which
 have more than one RTC style clock, it needed a more portable solution
@@ -136,35 +136,39 @@
 the system clock from the discrete RTC, but use the integrated one for all
 other tasks, because of its greater functionality.
 
-SYSFS INTERFACE
+SYSFS interface
 ---------------
 
 The sysfs interface under /sys/class/rtc/rtcN provides access to various
 rtc attributes without requiring the use of ioctls. All dates and times
 are in the RTC's timezone, rather than in system time.
 
-date:  	   	 RTC-provided date
-hctosys:   	 1 if the RTC provided the system time at boot via the
+================ ==============================================================
+date  	   	 RTC-provided date
+hctosys   	 1 if the RTC provided the system time at boot via the
 		 CONFIG_RTC_HCTOSYS kernel option, 0 otherwise
-max_user_freq:	 The maximum interrupt rate an unprivileged user may request
+max_user_freq	 The maximum interrupt rate an unprivileged user may request
 		 from this RTC.
-name:		 The name of the RTC corresponding to this sysfs directory
-since_epoch:	 The number of seconds since the epoch according to the RTC
-time:		 RTC-provided time
-wakealarm:	 The time at which the clock will generate a system wakeup
+name		 The name of the RTC corresponding to this sysfs directory
+since_epoch	 The number of seconds since the epoch according to the RTC
+time		 RTC-provided time
+wakealarm	 The time at which the clock will generate a system wakeup
 		 event. This is a one shot wakeup event, so must be reset
-		 after wake if a daily wakeup is required. Format is seconds since
-		 the epoch by default, or if there's a leading +, seconds in the
-		 future, or if there is a leading +=, seconds ahead of the current
-		 alarm.
-offset:		 The amount which the rtc clock has been adjusted in firmware.
+		 after wake if a daily wakeup is required. Format is seconds
+		 since the epoch by default, or if there's a leading +, seconds
+		 in the future, or if there is a leading +=, seconds ahead of
+		 the current alarm.
+offset		 The amount which the rtc clock has been adjusted in firmware.
 		 Visible only if the driver supports clock offset adjustment.
 		 The unit is parts per billion, i.e. The number of clock ticks
 		 which are added to or removed from the rtc's base clock per
 		 billion ticks. A positive value makes a day pass more slowly,
 		 longer, and a negative value makes a day pass more quickly.
+*/nvmem		 The non volatile storage exported as a raw file, as described
+		 in Documentation/nvmem/nvmem.txt
+================ ==============================================================
 
-IOCTL INTERFACE
+IOCTL interface
 ---------------
 
 The ioctl() calls supported by /dev/rtc are also supported by the RTC class

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 0d831a7..1648fa8 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst

@@ -894,6 +894,12 @@
      To apply a keyring restriction the process must have Set Attribute
      permission and the keyring must not be previously restricted.
 
+     One application of restricted keyrings is to verify X.509 certificate
+     chains or individual certificate signatures using the asymmetric key type.
+     See Documentation/crypto/asymmetric-keys.txt for specific restrictions
+     applicable to the asymmetric key type.
+
+
 Kernel Services
 ===============
 

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index b4ad97f..48244c4 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt

@@ -240,6 +240,26 @@
 
 ==============================================================
 
+highmem_is_dirtyable
+
+Available only for systems with CONFIG_HIGHMEM enabled (32b systems).
+
+This parameter controls whether the high memory is considered for dirty
+writers throttling.  This is not the case by default which means that
+only the amount of memory directly visible/usable by the kernel can
+be dirtied. As a result, on systems with a large amount of memory and
+lowmem basically depleted writers might be throttled too early and
+streaming writes can get very slow.
+
+Changing the value to non zero would allow more memory to be dirtied
+and thus allow writers to write more data which can be flushed to the
+storage more effectively. Note this also comes with a risk of pre-mature
+OOM killer because some writers (e.g. direct block device writes) can
+only use the low memory and they can fill it up with dirty data without
+any throttling.
+
+==============================================================
+
 hugepages_treat_as_movable
 
 This parameter controls whether we can allocate hugepages from ZONE_MOVABLE

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index fff8ff6..d4601df 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt

@@ -5,10 +5,11 @@
    Author:   Steven Rostedt <srostedt@redhat.com>
   License:   The GNU Free Documentation License, Version 1.2
                (dual licensed under the GPL v2)
-Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
-	     John Kacur, and David Teigland.
+Original Reviewers:   Elias Oltmanns, Randy Dunlap, Andrew Morton,
+		      John Kacur, and David Teigland.
 Written for: 2.6.28-rc2
 Updated for: 3.10
+Updated for: 4.13 - Copyright 2017 VMware Inc. Steven Rostedt
 
 Introduction
 ------------
@@ -26,9 +27,11 @@
 
 One of the most common uses of ftrace is the event tracing.
 Through out the kernel is hundreds of static event points that
-can be enabled via the debugfs file system to see what is
+can be enabled via the tracefs file system to see what is
 going on in certain parts of the kernel.
 
+See events.txt for more information.
+
 
 Implementation Details
 ----------------------
@@ -39,34 +42,47 @@
 The File System
 ---------------
 
-Ftrace uses the debugfs file system to hold the control files as
+Ftrace uses the tracefs file system to hold the control files as
 well as the files to display output.
 
-When debugfs is configured into the kernel (which selecting any ftrace
-option will do) the directory /sys/kernel/debug will be created. To mount
+When tracefs is configured into the kernel (which selecting any ftrace
+option will do) the directory /sys/kernel/tracing will be created. To mount
 this directory, you can add to your /etc/fstab file:
 
- debugfs       /sys/kernel/debug          debugfs defaults        0       0
+ tracefs       /sys/kernel/tracing       tracefs defaults        0       0
 
 Or you can mount it at run time with:
 
- mount -t debugfs nodev /sys/kernel/debug
+ mount -t tracefs nodev /sys/kernel/tracing
 
 For quicker access to that directory you may want to make a soft link to
 it:
 
- ln -s /sys/kernel/debug /debug
+ ln -s /sys/kernel/tracing /tracing
 
-Any selected ftrace option will also create a directory called tracing
-within the debugfs. The rest of the document will assume that you are in
-the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate
-on the files within that directory and not distract from the content with
-the extended "/sys/kernel/debug/tracing" path name.
+     *** NOTICE ***
+
+Before 4.1, all ftrace tracing control files were within the debugfs
+file system, which is typically located at /sys/kernel/debug/tracing.
+For backward compatibility, when mounting the debugfs file system,
+the tracefs file system will be automatically mounted at:
+
+ /sys/kernel/debug/tracing
+
+All files located in the tracefs file system will be located in that
+debugfs file system directory as well.
+
+     *** NOTICE ***
+
+Any selected ftrace option will also create the tracefs file system.
+The rest of the document will assume that you are in the ftrace directory
+(cd /sys/kernel/tracing) and will only concentrate on the files within that
+directory and not distract from the content with the extended
+"/sys/kernel/tracing" path name.
 
 That's it! (assuming that you have ftrace configured into your kernel)
 
-After mounting debugfs, you can see a directory called
-"tracing".  This directory contains the control and output files
+After mounting tracefs you will have access to the control and output files
 of ftrace. Here is a list of some of the key files:
 
 
@@ -92,10 +108,20 @@
 	writing to the ring buffer, the tracing overhead may
 	still be occurring.
 
+	The kernel function tracing_off() can be used within the
+	kernel to disable writing to the ring buffer, which will
+	set this file to "0". User space can re-enable tracing by
+	echoing "1" into the file.
+
+	Note, the function and event trigger "traceoff" will also
+	set this file to zero and stop tracing. Which can also
+	be re-enabled by user space using this file.
+
   trace:
 
 	This file holds the output of the trace in a human
-	readable format (described below).
+	readable format (described below). Note, tracing is temporarily
+	disabled while this file is being read (opened).
 
   trace_pipe:
 
@@ -109,7 +135,8 @@
 	will not be read again with a sequential read. The
 	"trace" file is static, and if the tracer is not
 	adding more data, it will display the same
-	information every time it is read.
+	information every time it is read. This file will not
+	disable tracing while being read.
 
   trace_options:
 
@@ -128,12 +155,14 @@
   tracing_max_latency:
 
 	Some of the tracers record the max latency.
-	For example, the time interrupts are disabled.
-	This time is saved in this file. The max trace
-	will also be stored, and displayed by "trace".
-	A new max trace will only be recorded if the
-	latency is greater than the value in this
-	file. (in microseconds)
+	For example, the maximum time that interrupts are disabled.
+	The maximum time is saved in this file. The max trace will also be
+	stored,	and displayed by "trace". A new max trace will only be
+	recorded if the latency is greater than the value in this file
+	(in microseconds).
+
+	By echoing in a time into this file, no latency will be recorded
+	unless it is greater than the time in this file.
 
   tracing_thresh:
 
@@ -152,32 +181,34 @@
 	that the kernel uses for allocation, usually 4 KB in size).
 	If the last page allocated has room for more bytes
 	than requested, the rest of the page will be used,
-	making the actual allocation bigger than requested.
+	making the actual allocation bigger than requested or shown.
 	( Note, the size may not be a multiple of the page size
 	  due to buffer management meta-data. )
 
+	Buffer sizes for individual CPUs may vary
+	(see "per_cpu/cpu0/buffer_size_kb" below), and if they do
+	this file will show "X".
+
   buffer_total_size_kb:
 
 	This displays the total combined size of all the trace buffers.
 
   free_buffer:
 
-	If a process is performing the tracing, and the ring buffer
-	should be shrunk "freed" when the process is finished, even
-	if it were to be killed by a signal, this file can be used
-	for that purpose. On close of this file, the ring buffer will
-	be resized to its minimum size. Having a process that is tracing
-	also open this file, when the process exits its file descriptor
-	for this file will be closed, and in doing so, the ring buffer
-	will be "freed".
+	If a process is performing tracing, and the ring buffer	should be
+	shrunk "freed" when the process is finished, even if it were to be
+	killed by a signal, this file can be used for that purpose. On close
+	of this file, the ring buffer will be resized to its minimum size.
+	Having a process that is tracing also open this file, when the process
+	exits its file descriptor for this file will be closed, and in doing so,
+	the ring buffer will be "freed".
 
 	It may also stop tracing if disable_on_free option is set.
 
   tracing_cpumask:
 
-	This is a mask that lets the user only trace
-	on specified CPUs. The format is a hex string
-	representing the CPUs.
+	This is a mask that lets the user only trace on specified CPUs.
+	The format is a hex string representing the CPUs.
 
   set_ftrace_filter:
 
@@ -190,6 +221,9 @@
 	to be traced. Echoing names of functions into this file
 	will limit the trace to only those functions.
 
+	The functions listed in "available_filter_functions" are what
+	can be written into this file.
+
 	This interface also allows for commands to be used. See the
 	"Filter commands" section for more details.
 
@@ -202,7 +236,14 @@
 
   set_ftrace_pid:
 
-	Have the function tracer only trace a single thread.
+	Have the function tracer only trace the threads whose PID are
+	listed in this file.
+
+	If the "function-fork" option is set, then when a task whose
+	PID is listed in this file forks, the child's PID will
+	automatically be added to this file, and the child will be
+	traced by the function tracer as well. This option will also
+	cause PIDs of tasks that exit to be removed from the file.
 
   set_event_pid:
 
@@ -217,17 +258,28 @@
 
   set_graph_function:
 
-	Set a "trigger" function where tracing should start
-	with the function graph tracer (See the section
-	"dynamic ftrace" for more details).
+	Functions listed in this file will cause the function graph
+	tracer to only trace these functions and the functions that
+	they call. (See the section "dynamic ftrace" for more details).
+
+  set_graph_notrace:
+
+	Similar to set_graph_function, but will disable function graph
+	tracing when the function is hit until it exits the function.
+	This makes it possible to ignore tracing functions that are called
+	by a specific function.
 
   available_filter_functions:
 
-	This lists the functions that ftrace
-	has processed and can trace. These are the function
-	names that you can pass to "set_ftrace_filter" or
-	"set_ftrace_notrace". (See the section "dynamic ftrace"
-	below for more details.)
+	This lists the functions that ftrace has processed and can trace.
+	These are the function names that you can pass to
+	"set_ftrace_filter" or "set_ftrace_notrace".
+	(See the section "dynamic ftrace" below for more details.)
+
+  dyn_ftrace_total_info:
+
+	This file is for debugging purposes. The number of functions that
+	have been converted to nops and are available to be traced.
 
   enabled_functions:
 
@@ -250,12 +302,21 @@
 	an 'I' will be displayed on the same line as the function that
 	can be overridden.
 
+	If the architecture supports it, it will also show what callback
+	is being directly called by the function. If the count is greater
+	than 1 it most likely will be ftrace_ops_list_func().
+
+	If the callback of the function jumps to a trampoline that is
+	specific to a the callback and not the standard trampoline,
+	its address will be printed as well as the function that the
+	trampoline calls.
+
   function_profile_enabled:
 
 	When set it will enable all functions with either the function
-	tracer, or if enabled, the function graph tracer. It will
+	tracer, or if configured, the function graph tracer. It will
 	keep a histogram of the number of functions that were called
-	and if run with the function graph tracer, it will also keep
+	and if the function graph tracer was configured, it will also keep
 	track of the time spent in those functions. The histogram
 	content can be displayed in the files:
 
@@ -283,12 +344,11 @@
   printk_formats:
 
 	This is for tools that read the raw format files. If an event in
-	the ring buffer references a string (currently only trace_printk()
-	does this), only a pointer to the string is recorded into the buffer
-	and not the string itself. This prevents tools from knowing what
-	that string was. This file displays the string and address for
-	the string allowing tools to map the pointers to what the
-	strings were.
+	the ring buffer references a string, only a pointer to the string
+	is recorded into the buffer and not the string itself. This prevents
+	tools from knowing what that string was. This file displays the string
+	and address for	the string allowing tools to map the pointers to what
+	the strings were.
 
   saved_cmdlines:
 
@@ -298,6 +358,22 @@
 	comms for events. If a pid for a comm is not listed, then
 	"<...>" is displayed in the output.
 
+	If the option "record-cmd" is set to "0", then comms of tasks
+	will not be saved during recording. By default, it is enabled.
+
+  saved_cmdlines_size:
+
+	By default, 128 comms are saved (see "saved_cmdlines" above). To
+	increase or decrease the amount of comms that are cached, echo
+	in a the number of comms to cache, into this file.
+
+  saved_tgids:
+
+	If the option "record-tgid" is set, on each scheduling context switch
+	the Task Group ID of a task is saved in a table mapping the PID of
+	the thread to its TGID. By default, the "record-tgid" option is
+	disabled.
+
   snapshot:
 
 	This displays the "snapshot" buffer and also lets the user
@@ -336,6 +412,9 @@
 	  # cat trace_clock
 	  [local] global counter x86-tsc
 
+	  The clock with the square brackets around it is the one
+	  in effect.
+
 	  local: Default clock, but may not be in sync across CPUs
 
 	  global: This clock is in sync with all CPUs but may
@@ -448,6 +527,23 @@
 
 	See events.txt for more information.
 
+  set_event:
+
+	By echoing in the event into this file, will enable that event.
+
+	See events.txt for more information.
+
+  available_events:
+
+	A list of events that can be enabled in tracing.
+
+	See events.txt for more information.
+
+  hwlat_detector:
+
+	Directory for the Hardware Latency Detector.
+	See "Hardware Latency Detector" section below.
+
   per_cpu:
 
 	This is a directory that contains the trace per_cpu information.
@@ -539,13 +635,25 @@
 	to draw a graph of function calls similar to C code
 	source.
 
+  "blk"
+
+	The block tracer. The tracer used by the blktrace user
+	application.
+
+  "hwlat"
+
+	The Hardware Latency tracer is used to detect if the hardware
+	produces any latency. See "Hardware Latency Detector" section
+	below.
+
   "irqsoff"
 
 	Traces the areas that disable interrupts and saves
 	the trace with the longest max latency.
 	See tracing_max_latency. When a new max is recorded,
 	it replaces the old trace. It is best to view this
-	trace with the latency-format option enabled.
+	trace with the latency-format option enabled, which
+	happens automatically when the tracer is selected.
 
   "preemptoff"
 
@@ -571,6 +679,26 @@
         RT tasks (as the current "wakeup" does). This is useful
         for those interested in wake up timings of RT tasks.
 
+  "wakeup_dl"
+
+	Traces and records the max latency that it takes for
+	a SCHED_DEADLINE task to be woken (as the "wakeup" and
+	"wakeup_rt" does).
+
+  "mmiotrace"
+
+	A special tracer that is used to trace binary module.
+	It will trace all the calls that a module makes to the
+	hardware. Everything it writes and reads from the I/O
+	as well.
+
+  "branch"
+
+	This tracer can be configured when tracing likely/unlikely
+	calls within the kernel. It will trace when a likely and
+	unlikely branch is hit and if it was correct in its prediction
+	of being correct.
+
   "nop"
 
 	This is the "trace nothing" tracer. To remove all
@@ -582,7 +710,7 @@
 ----------------------------
 
 Here are typical examples of using the tracers when controlling
-them only with the debugfs interface (without using any
+them only with the tracefs interface (without using any
 user-land utilities).
 
 Output format:
@@ -674,7 +802,7 @@
 This shows that the current tracer is "irqsoff" tracing the time
 for which interrupts were disabled. It gives the trace version (which
 never changes) and the version of the kernel upon which this was executed on
-(3.10). Then it displays the max latency in microseconds (259 us). The number
+(3.8). Then it displays the max latency in microseconds (259 us). The number
 of trace entries displayed and the total number (both are four: #4/4).
 VP, KP, SP, and HP are always zero and are reserved for later use.
 #P is the number of online CPUs (#P:4).
@@ -709,6 +837,8 @@
 	'.' otherwise.
 
   hardirq/softirq:
+	'Z' - NMI occurred inside a hardirq
+	'z' - NMI is running
 	'H' - hard irq occurred inside a softirq.
 	'h' - hard irq is running
 	's' - soft irq is running
@@ -757,24 +887,24 @@
 nobin
 noblock
 trace_printk
-nobranch
 annotate
 nouserstacktrace
 nosym-userobj
 noprintk-msg-only
 context-info
 nolatency-format
-sleep-time
-graph-time
 record-cmd
+norecord-tgid
 overwrite
 nodisable_on_free
 irq-info
 markers
 noevent-fork
 function-trace
+nofunction-fork
 nodisplay-graph
 nostacktrace
+nobranch
 
 To disable one of the options, echo in the option prepended with
 "no".
@@ -830,8 +960,6 @@
 
   trace_printk - Can disable trace_printk() from writing into the buffer.
 
-  branch - Enable branch tracing with the tracer.
-
   annotate - It is sometimes confusing when the CPU buffers are full
   	     and one CPU buffer had a lot of events recently, thus
 	     a shorter time frame, were another CPU may have only had
@@ -850,7 +978,8 @@
           <idle>-0     [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock
 
   userstacktrace - This option changes the trace. It records a
-		   stacktrace of the current userspace thread.
+		   stacktrace of the current user space thread after
+		   each trace event.
 
   sym-userobj - when user stacktrace are enabled, look up which
 		object the address belongs to, and print a
@@ -873,29 +1002,21 @@
   context-info - Show only the event data. Hides the comm, PID,
   	         timestamp, CPU, and other useful data.
 
-  latency-format - This option changes the trace. When
-                   it is enabled, the trace displays
-                   additional information about the
-                   latencies, as described in "Latency
-                   trace format".
-
-  sleep-time - When running function graph tracer, to include
-  	       the time a task schedules out in its function.
-	       When enabled, it will account time the task has been
-	       scheduled out as part of the function call.
-
-  graph-time - When running function profiler with function graph tracer,
-	       to include the time to call nested functions. When this is
-	       not set, the time reported for the function will only
-	       include the time the function itself executed for, not the
-	       time for functions that it called.
+  latency-format - This option changes the trace output. When it is enabled,
+		   the trace displays additional information about the
+		   latency, as described in "Latency trace format".
 
   record-cmd - When any event or tracer is enabled, a hook is enabled
-  	       in the sched_switch trace point to fill comm cache
+	       in the sched_switch trace point to fill comm cache
 	       with mapped pids and comms. But this may cause some
 	       overhead, and if you only care about pids, and not the
 	       name of the task, disabling this option can lower the
-	       impact of tracing.
+	       impact of tracing. See "saved_cmdlines".
+
+  record-tgid - When any event or tracer is enabled, a hook is enabled
+	        in the sched_switch trace point to fill the cache of
+		mapped Thread Group IDs (TGID) mapping to pids. See
+		"saved_tgids".
 
   overwrite - This controls what happens when the trace buffer is
               full. If "1" (default), the oldest events are
@@ -935,19 +1056,98 @@
 	    functions. This keeps the overhead of the tracer down
 	    when performing latency tests.
 
+  function-fork - When set, tasks with PIDs listed in set_ftrace_pid will
+		  have the PIDs of their children added to set_ftrace_pid
+		  when those tasks fork. Also, when tasks with PIDs in
+		  set_ftrace_pid exit, their PIDs will be removed from the
+		  file.
+
   display-graph - When set, the latency tracers (irqsoff, wakeup, etc) will
 	          use function graph tracing instead of function tracing.
 
-  stacktrace - This is one of the options that changes the trace
-	       itself. When a trace is recorded, so is the stack
-	       of functions. This allows for back traces of
-	       trace sites.
+  stacktrace - When set, a stack trace is recorded after any trace event
+	       is recorded.
+
+  branch - Enable branch tracing with the tracer. This enables branch
+	   tracer along with the currently set tracer. Enabling this
+	   with the "nop" tracer is the same as just enabling the
+	   "branch" tracer.
 
  Note: Some tracers have their own options. They only appear in this
        file when the tracer is active. They always appear in the
        options directory.
 
 
+Here are the per tracer options:
+
+Options for function tracer:
+
+  func_stack_trace - When set, a stack trace is recorded after every
+		     function that is recorded. NOTE! Limit the functions
+		     that are recorded before enabling this, with
+		     "set_ftrace_filter" otherwise the system performance
+		     will be critically degraded. Remember to disable
+		     this option before clearing the function filter.
+
+Options for function_graph tracer:
+
+ Since the function_graph tracer has a slightly different output
+ it has its own options to control what is displayed.
+
+  funcgraph-overrun - When set, the "overrun" of the graph stack is
+		      displayed after each function traced. The
+		      overrun, is when the stack depth of the calls
+		      is greater than what is reserved for each task.
+		      Each task has a fixed array of functions to
+		      trace in the call graph. If the depth of the
+		      calls exceeds that, the function is not traced.
+		      The overrun is the number of functions missed
+		      due to exceeding this array.
+
+  funcgraph-cpu - When set, the CPU number of the CPU where the trace
+		  occurred is displayed.
+
+  funcgraph-overhead - When set, if the function takes longer than
+		       A certain amount, then a delay marker is
+		       displayed. See "delay" above, under the
+		       header description.
+
+  funcgraph-proc - Unlike other tracers, the process' command line
+		   is not displayed by default, but instead only
+		   when a task is traced in and out during a context
+		   switch. Enabling this options has the command
+		   of each process displayed at every line.
+
+  funcgraph-duration - At the end of each function (the return)
+		       the duration of the amount of time in the
+		       function is displayed in microseconds.
+
+  funcgraph-abstime - When set, the timestamp is displayed at each
+		      line.
+
+  funcgraph-irqs - When disabled, functions that happen inside an
+		   interrupt will not be traced.
+
+  funcgraph-tail - When set, the return event will include the function
+		   that it represents. By default this is off, and
+		   only a closing curly bracket "}" is displayed for
+		   the return of a function.
+
+  sleep-time - When running function graph tracer, to include
+	       the time a task schedules out in its function.
+	       When enabled, it will account time the task has been
+	       scheduled out as part of the function call.
+
+  graph-time - When running function profiler with function graph tracer,
+	       to include the time to call nested functions. When this is
+	       not set, the time reported for the function will only
+	       include the time the function itself executed for, not the
+	       time for functions that it called.
+
+Options for blk tracer:
+
+  blk_classic - Shows a more minimalistic output.
+
 
 irqsoff
 -------
@@ -1711,6 +1911,85 @@
   <idle>-0       2d..3    6us :      0:120:R ==> [002]  5882: 94:R sleep
 
 
+Hardware Latency Detector
+-------------------------
+
+The hardware latency detector is executed by enabling the "hwlat" tracer.
+
+NOTE, this tracer will affect the performance of the system as it will
+periodically make a CPU constantly busy with interrupts disabled.
+
+ # echo hwlat > current_tracer
+ # sleep 100
+ # cat trace
+# tracer: hwlat
+#
+#                              _-----=> irqs-off
+#                             / _----=> need-resched
+#                            | / _---=> hardirq/softirq
+#                            || / _--=> preempt-depth
+#                            ||| /     delay
+#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+#              | |       |   ||||       |         |
+           <...>-3638  [001] d... 19452.055471: #1     inner/outer(us):   12/14    ts:1499801089.066141940
+           <...>-3638  [003] d... 19454.071354: #2     inner/outer(us):   11/9     ts:1499801091.082164365
+           <...>-3638  [002] dn.. 19461.126852: #3     inner/outer(us):   12/9     ts:1499801098.138150062
+           <...>-3638  [001] d... 19488.340960: #4     inner/outer(us):    8/12    ts:1499801125.354139633
+           <...>-3638  [003] d... 19494.388553: #5     inner/outer(us):    8/12    ts:1499801131.402150961
+           <...>-3638  [003] d... 19501.283419: #6     inner/outer(us):    0/12    ts:1499801138.297435289 nmi-total:4 nmi-count:1
+
+
+The above output is somewhat the same in the header. All events will have
+interrupts disabled 'd'. Under the FUNCTION title there is:
+
+ #1 - This is the count of events recorded that were greater than the
+      tracing_threshold (See below).
+
+ inner/outer(us):   12/14
+
+      This shows two numbers as "inner latency" and "outer latency". The test
+      runs in a loop checking a timestamp twice. The latency detected within
+      the two timestamps is the "inner latency" and the latency detected
+      after the previous timestamp and the next timestamp in the loop is
+      the "outer latency".
+
+ ts:1499801089.066141940
+
+      The absolute timestamp that the event happened.
+
+ nmi-total:4 nmi-count:1
+
+      On architectures that support it, if an NMI comes in during the
+      test, the time spent in NMI is reported in "nmi-total" (in
+      microseconds).
+
+      All architectures that have NMIs will show the "nmi-count" if an
+      NMI comes in during the test.
+
+hwlat files:
+
+  tracing_threshold - This gets automatically set to "10" to represent 10
+		      microseconds. This is the threshold of latency that
+		      needs to be detected before the trace will be recorded.
+
+		      Note, when hwlat tracer is finished (another tracer is
+		      written into "current_tracer"), the original value for
+		      tracing_threshold is placed back into this file.
+
+  hwlat_detector/width - The length of time the test runs with interrupts
+			 disabled.
+
+  hwlat_detector/window - The length of time of the window which the test
+			  runs. That is, the test will run for "width"
+			  microseconds per "window" microseconds
+
+  tracing_cpumask - When the test is started. A kernel thread is created that
+		    runs the test. This thread will alternate between CPUs
+		    listed in the tracing_cpumask between each period
+		    (one "window"). To limit the test to specific CPUs
+		    set the mask in this file to only the CPUs that the test
+		    should run on.
+
 function
 --------
 
@@ -1821,15 +2100,15 @@
 #define STR(x) _STR(x)
 #define MAX_PATH 256
 
-const char *find_debugfs(void)
+const char *find_tracefs(void)
 {
-       static char debugfs[MAX_PATH+1];
-       static int debugfs_found;
+       static char tracefs[MAX_PATH+1];
+       static int tracefs_found;
        char type[100];
        FILE *fp;
 
-       if (debugfs_found)
-               return debugfs;
+       if (tracefs_found)
+               return tracefs;
 
        if ((fp = fopen("/proc/mounts","r")) == NULL) {
                perror("/proc/mounts");
@@ -1839,27 +2118,27 @@
        while (fscanf(fp, "%*s %"
                      STR(MAX_PATH)
                      "s %99s %*s %*d %*d\n",
-                     debugfs, type) == 2) {
-               if (strcmp(type, "debugfs") == 0)
+                     tracefs, type) == 2) {
+               if (strcmp(type, "tracefs") == 0)
                        break;
        }
        fclose(fp);
 
-       if (strcmp(type, "debugfs") != 0) {
-               fprintf(stderr, "debugfs not mounted");
+       if (strcmp(type, "tracefs") != 0) {
+               fprintf(stderr, "tracefs not mounted");
                return NULL;
        }
 
-       strcat(debugfs, "/tracing/");
-       debugfs_found = 1;
+       strcat(tracefs, "/tracing/");
+       tracefs_found = 1;
 
-       return debugfs;
+       return tracefs;
 }
 
 const char *tracing_file(const char *file_name)
 {
        static char trace_file[MAX_PATH+1];
-       snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name);
+       snprintf(trace_file, MAX_PATH, "%s/%s", find_tracefs(), file_name);
        return trace_file;
 }
 
@@ -1898,12 +2177,12 @@
 ------
 #!/bin/bash
 
-debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts`
-echo nop > $debugfs/tracing/current_tracer
-echo 0 > $debugfs/tracing/tracing_on
-echo $$ > $debugfs/tracing/set_ftrace_pid
-echo function > $debugfs/tracing/current_tracer
-echo 1 > $debugfs/tracing/tracing_on
+tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts`
+echo nop > $tracefs/tracing/current_tracer
+echo 0 > $tracefs/tracing/tracing_on
+echo $$ > $tracefs/tracing/set_ftrace_pid
+echo function > $tracefs/tracing/current_tracer
+echo 1 > $tracefs/tracing/tracing_on
 exec "$@"
 ------
 
@@ -2145,13 +2424,18 @@
 At compile time every C file object is run through the
 recordmcount program (located in the scripts directory). This
 program will parse the ELF headers in the C object to find all
-the locations in the .text section that call mcount. (Note, only
-white listed .text sections are processed, since processing other
-sections like .init.text may cause races due to those sections
-being freed unexpectedly).
+the locations in the .text section that call mcount. Starting
+with gcc verson 4.6, the -mfentry has been added for x86, which
+calls "__fentry__" instead of "mcount". Which is called before
+the creation of the stack frame.
 
-A new section called "__mcount_loc" is created that holds
-references to all the mcount call sites in the .text section.
+Note, not all sections are traced. They may be prevented by either
+a notrace, or blocked another way and all inline functions are not
+traced. Check the "available_filter_functions" file to see what functions
+can be traced.
+
+A section called "__mcount_loc" is created that holds
+references to all the mcount/fentry call sites in the .text section.
 The recordmcount program re-links this section back into the
 original object. The final linking stage of the kernel will add all these
 references into a single table.
@@ -2679,7 +2963,7 @@
 buffer with a spare buffer, and tracing continues in the new
 current (=previous spare) buffer.
 
-The following debugfs files in "tracing" are related to this
+The following tracefs files in "tracing" are related to this
 feature:
 
   snapshot:
@@ -2752,7 +3036,7 @@
 
 Instances
 ---------
-In the debugfs tracing directory is a directory called "instances".
+In the tracefs tracing directory is a directory called "instances".
 This directory can have new directories created inside of it using
 mkdir, and removing directories with rmdir. The directory created
 with mkdir in this directory will already contain files and other

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index c6f4ead..38310dc 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt

@@ -523,11 +523,11 @@
      즉, ACQUIRE 는 최소한의 "취득" 동작처럼, 그리고 RELEASE 는 최소한의 "공개"
      처럼 동작한다는 의미입니다.
 
-atomic_ops.txt 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히 순서잡힌 것들과
-(배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와 RELEASE 부류의
-것들도 존재합니다.  로드와 스토어를 모두 수행하는 조합된 어토믹 오퍼레이션에서,
-ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는 해당
-오퍼레이션의 스토어 부분에만 적용됩니다.
+core-api/atomic_ops.rst 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히
+순서잡힌 것들과 (배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와
+RELEASE 부류의 것들도 존재합니다.  로드와 스토어를 모두 수행하는 조합된 어토믹
+오퍼레이션에서, ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는
+해당 오퍼레이션의 스토어 부분에만 적용됩니다.
 
 메모리 배리어들은 두 CPU 간, 또는 CPU 와 디바이스 간에 상호작용의 가능성이 있을
 때에만 필요합니다.  만약 어떤 코드에 그런 상호작용이 없을 것이 보장된다면, 해당
@@ -1848,7 +1848,7 @@
      이 코드는 객체의 업데이트된 death 마크가 레퍼런스 카운터 감소 동작
      *전에* 보일 것을 보장합니다.
 
-     더 많은 정보를 위해선 Documentation/atomic_ops.txt 문서를 참고하세요.
+     더 많은 정보를 위해선 Documentation/core-api/atomic_ops.rst 문서를 참고하세요.
      어디서 이것들을 사용해야 할지 궁금하다면 "어토믹 오퍼레이션" 서브섹션을
      참고하세요.
 
@@ -2550,7 +2550,7 @@
 있는데, 그런 경우에 이 특수 메모리 배리어 도구들은 no-op 이 되어 실질적으로
 아무일도 하지 않습니다.
 
-더 많은 내용을 위해선 Documentation/atomic_ops.txt 를 참고하세요.
+더 많은 내용을 위해선 Documentation/core-api/atomic_ops.rst 를 참고하세요.
 
 
 디바이스 액세스

diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 914518a..b352636 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt

@@ -369,6 +369,12 @@
 nowayout: Watchdog cannot be stopped once started
 	(default=kernel config parameter)
 -------------------------------------------------
+uniphier_wdt:
+timeout: Watchdog timeout in power of two seconds.
+	(1 <= timeout <= 128, default=64)
+nowayout: Watchdog cannot be stopped once started
+	(default=kernel config parameter)
+-------------------------------------------------
 w83627hf_wdt:
 wdt_io: w83627hf/thf WDT io port (default 0x2E)
 timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.

diff --git a/MAINTAINERS b/MAINTAINERS
index 7ad8107..4bae99c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1139,6 +1139,18 @@
 F:	arch/arm/boot/dts/aspeed-*
 F:	drivers/*/*aspeed*
 
+ARM/ASPEED I2C DRIVER
+M:	Brendan Higgins <brendanhiggins@google.com>
+R:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
+R:	Joel Stanley <joel@jms.id.au>
+L:	linux-i2c@vger.kernel.org
+L:	openbmc@lists.ozlabs.org
+S:	Maintained
+F:	drivers/irqchip/irq-aspeed-i2c-ic.c
+F:	drivers/i2c/busses/i2c-aspeed.c
+F:	Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt
+F:	Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
+
 ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
 M:	Nicolas Ferre <nicolas.ferre@microchip.com>
 M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
@@ -1243,7 +1255,7 @@
 T:	git git://github.com/ulli-kroll/linux.git
 S:	Maintained
 F:	arch/arm/mach-gemini/
-F:	drivers/rtc/rtc-gemini.c
+F:	drivers/rtc/rtc-ftrtc010.c
 
 ARM/CSR SIRFPRIMA2 MACHINE SUPPORT
 M:	Barry Song <baohua@kernel.org>
@@ -3319,9 +3331,10 @@
 F:	drivers/input/touchscreen/chipone_icn8318.c
 
 CHROME HARDWARE PLATFORM SUPPORT
+M:	Benson Leung <bleung@chromium.org>
 M:	Olof Johansson <olof@lixom.net>
 S:	Maintained
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/olof/chrome-platform.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bleung/chrome-platform.git
 F:	drivers/platform/chrome/
 
 CISCO VIC ETHERNET NIC DRIVER
@@ -3961,6 +3974,12 @@
 S:	Maintained
 F:	drivers/platform/x86/dell-wmi.c
 
+DENALI NAND DRIVER
+M:	Masahiro Yamada <yamada.masahiro@socionext.com>
+L:	linux-mtd@lists.infradead.org
+S:	Supported
+F:	drivers/mtd/nand/denali*
+
 DESIGNWARE USB2 DRD IP DRIVER
 M:	John Youn <johnyoun@synopsys.com>
 L:	linux-usb@vger.kernel.org
@@ -6381,6 +6400,7 @@
 L:	linux-i2c@vger.kernel.org
 L:	linux-acpi@vger.kernel.org
 S:	Maintained
+F:	drivers/i2c/i2c-core-acpi.c
 
 I2C-TAOS-EVM DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
@@ -9361,6 +9381,12 @@
 F:	include/linux/ntb_transport.h
 F:	tools/testing/selftests/ntb/
 
+NTB IDT DRIVER
+M:	Serge Semin <fancer.lancer@gmail.com>
+L:	linux-ntb@googlegroups.com
+S:	Supported
+F:	drivers/ntb/hw/idt/
+
 NTB INTEL DRIVER
 M:	Jon Mason <jdmason@kudzu.us>
 M:	Dave Jiang <dave.jiang@intel.com>
@@ -10559,6 +10585,17 @@
 S:	Obsolete
 F:	drivers/net/wireless/intersil/prism54/
 
+PROC SYSCTL
+M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+M:	Kees Cook <keescook@chromium.org>
+L:	linux-kernel@vger.kernel.org
+L:	linux-fsdevel@vger.kernel.org
+S:	Maintained
+F:	fs/proc/proc_sysctl.c
+F:	include/linux/sysctl.h
+F:	kernel/sysctl.c
+F:	tools/testing/selftests/sysctl/
+
 PS3 NETWORK SUPPORT
 M:	Geoff Levand <geoff@infradead.org>
 L:	netdev@vger.kernel.org
@@ -12439,7 +12476,8 @@
 L:	linux-mtd@lists.infradead.org
 W:	http://www.linux-mtd.infradead.org/
 Q:	http://patchwork.ozlabs.org/project/linux-mtd/list/
-T:	git git://github.com/spi-nor/linux.git
+T:	git git://git.infradead.org/linux-mtd.git spi-nor/fixes
+T:	git git://git.infradead.org/l2-mtd.git spi-nor/next
 S:	Maintained
 F:	drivers/mtd/spi-nor/
 F:	include/linux/mtd/spi-nor.h

diff --git a/Makefile b/Makefile
index 06ef994..44f5bfd 100644
--- a/Makefile
+++ b/Makefile

@@ -294,10 +294,17 @@
 	  else if [ -x /bin/bash ]; then echo /bin/bash; \
 	  else echo sh; fi ; fi)
 
+HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS)
+HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS)
+HOST_LFS_LIBS := $(shell getconf LFS_LIBS)
+
 HOSTCC       = gcc
 HOSTCXX      = g++
-HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
-HOSTCXXFLAGS = -O2
+HOSTCFLAGS   := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 \
+		-fomit-frame-pointer -std=gnu89 $(HOST_LFS_CFLAGS)
+HOSTCXXFLAGS := -O2 $(HOST_LFS_CFLAGS)
+HOSTLDFLAGS  := $(HOST_LFS_LDFLAGS)
+HOST_LOADLIBES := $(HOST_LFS_LIBS)
 
 ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1)
 HOSTCFLAGS  += -Wno-unused-value -Wno-unused-parameter \
@@ -408,7 +415,7 @@
 
 export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
 export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP
+export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
 export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
@@ -456,10 +463,11 @@
 endif
 
 # Support for using generic headers in asm-generic
-PHONY += asm-generic
-asm-generic:
+PHONY += asm-generic uapi-asm-generic
+asm-generic: uapi-asm-generic
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-generic \
 	            src=asm obj=arch/$(SRCARCH)/include/generated/asm
+uapi-asm-generic:
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-generic \
 	            src=uapi/asm obj=arch/$(SRCARCH)/include/generated/uapi/asm
 
@@ -622,6 +630,9 @@
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= $(call cc-option,-Oz,-Os)
@@ -1133,7 +1144,7 @@
 #Default location for installed headers
 export INSTALL_HDR_PATH = $(objtree)/usr
 
-# If we do an all arch process set dst to asm-$(hdr-arch)
+# If we do an all arch process set dst to include/arch-$(hdr-arch)
 hdr-dst = $(if $(KBUILD_HEADERS), dst=include/arch-$(hdr-arch), dst=include)
 
 PHONY += archheaders
@@ -1143,7 +1154,7 @@
 archscripts:
 
 PHONY += __headers
-__headers: $(version_h) scripts_basic asm-generic archheaders archscripts
+__headers: $(version_h) scripts_basic uapi-asm-generic archheaders archscripts
 	$(Q)$(MAKE) $(build)=scripts build_unifdef
 
 PHONY += headers_install_all
@@ -1154,7 +1165,7 @@
 headers_install: __headers
 	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
 	  $(error Headers not exportable for the $(SRCARCH) architecture))
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi dst=include
 	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst)
 
 PHONY += headers_check_all
@@ -1163,7 +1174,7 @@
 
 PHONY += headers_check
 headers_check: headers_install
-	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi dst=include HDRCHECK=1
 	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------

diff --git a/arch/Kconfig b/arch/Kconfig
index cae0958..21d0089 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -198,9 +198,6 @@
 config HAVE_NMI
 	bool
 
-config HAVE_NMI_WATCHDOG
-	depends on HAVE_NMI
-	bool
 #
 # An arch should select this if it provides all these things:
 #
@@ -226,6 +223,12 @@
 config GENERIC_IDLE_POLL_SETUP
        bool
 
+config ARCH_HAS_FORTIFY_SOURCE
+	bool
+	help
+	  An architecture should select this when it can successfully
+	  build and run with CONFIG_FORTIFY_SOURCE.
+
 # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h
 config ARCH_HAS_SET_MEMORY
 	bool
@@ -288,6 +291,28 @@
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_HARDLOCKUP_DETECTOR_PERF
+	bool
+	depends on HAVE_PERF_EVENTS_NMI
+	help
+	  The arch chooses to use the generic perf-NMI-based hardlockup
+	  detector. Must define HAVE_PERF_EVENTS_NMI.
+
+config HAVE_NMI_WATCHDOG
+	depends on HAVE_NMI
+	bool
+	help
+	  The arch provides a low level NMI watchdog. It provides
+	  asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
+
+config HAVE_HARDLOCKUP_DETECTOR_ARCH
+	bool
+	select HAVE_NMI_WATCHDOG
+	help
+	  The arch chooses to provide its own hardlockup detector, which is
+	  a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
+	  interfaces and parameters provided by hardlockup detector subsystem.
+
 config HAVE_PERF_REGS
 	bool
 	help

diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 3e74ca5..353dae3 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild

@@ -1,51 +1,27 @@
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
-generic-y += shmbuf.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h

diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index b55fc2a..fa6d0ff 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild

@@ -1,4 +1,28 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index ea78327..f3a4bed 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c

@@ -33,6 +33,7 @@ extern void error(char *);
 /* Not needed, but used in some headers pulled in by decompressors */
 extern char * strstr(const char * s1, const char *s2);
 extern size_t strlen(const char *s);
+extern int memcmp(const void *cs, const void *ct, size_t count);
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"

diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index d836050..721ab5e 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild

@@ -1,39 +1,23 @@
-
-
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += param.h
 generic-y += parport.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += seccomp.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
 generic-y += simd.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += trace_clock.h
 generic-y += unaligned.h

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index d2315ff..f13ae15 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h

@@ -112,12 +112,8 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
+/* This is the base location for PIE (ET_DYN with INTERP) loads. */
+#define ELF_ET_DYN_BASE		0x400000UL
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 

diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 5850890..14b5903 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h

@@ -110,8 +110,8 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
 
-void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp);
-void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
+asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
 static inline bool __vfp_enabled(void)
 {
 	return !(read_sysreg(HCPTR) & (HCPTR_TCP(11) | HCPTR_TCP(10)));
@@ -120,8 +120,8 @@ static inline bool __vfp_enabled(void)
 void __hyp_text __banked_save_state(struct kvm_cpu_context *ctxt);
 void __hyp_text __banked_restore_state(struct kvm_cpu_context *ctxt);
 
-int asmlinkage __guest_enter(struct kvm_vcpu *vcpu,
+asmlinkage int __guest_enter(struct kvm_vcpu *vcpu,
 			     struct kvm_cpu_context *host);
-int asmlinkage __hyp_do_panic(const char *, int, u32);
+asmlinkage int __hyp_do_panic(const char *, int, u32);
 
 #endif /* __ARM_KVM_HYP_H__ */

diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 5fb3368..8e17fe8 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild

@@ -5,4 +5,18 @@
 generated-y += unistd-oabi.h
 generated-y += unistd-eabi.h
 
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += termbits.h
+generic-y += termios.h

diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 68c6ae0..98fbfd2 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c

@@ -18,6 +18,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/root_dev.h>
@@ -91,8 +92,6 @@ __tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
 #ifdef CONFIG_BLK_DEV_RAM
 static int __init parse_tag_ramdisk(const struct tag *tag)
 {
-	extern int rd_size, rd_image_start, rd_prompt, rd_doload;
-
 	rd_image_start = tag->u.ramdisk.start;
 	rd_doload = (tag->u.ramdisk.flags & 1) == 0;
 	rd_prompt = (tag->u.ramdisk.flags & 2) == 0;

diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 56dc1a3..c1809fb 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c

@@ -480,7 +480,7 @@ static void pcibios_init_hw(struct device *parent, struct hw_pci *hw,
 
 			ret = pcibios_init_resource(nr, sys, hw->io_optional);
 			if (ret)  {
-				kfree(sys);
+				pci_free_host_bridge(bridge);
 				break;
 			}
 

diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index b143c46..7fc11a3 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c

@@ -33,7 +33,7 @@ static unsigned long jornada_ssp_flags;
  * we need to reverse all data we receive from the mcu due to its physical location
  * returns : 01110111 -> 11101110
  */
-u8 inline jornada_ssp_reverse(u8 byte)
+inline u8 jornada_ssp_reverse(u8 byte)
 {
 	return
 		((0x80 & byte) >> 7) |

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8addb85..dfd90863 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig

@@ -12,6 +12,7 @@
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
 	select ARCH_HAS_KCOV

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index a7a97a6..f81c7b6 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild

@@ -6,41 +6,24 @@
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += msi.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
 generic-y += set_memory.h
-generic-y += shmbuf.h
 generic-y += simd.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += swab.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index ac3fb74..acae781 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h

@@ -113,12 +113,11 @@
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
 
 /*
- * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
- * use of this is to invoke "./ld.so someprog" to test out a new version of
- * the loader.  We need to make sure that it is out of the way of the program
- * that it will "exec", and that there is sufficient room for the brk.
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
  */
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
+#define ELF_ET_DYN_BASE		0x100000000UL
 
 #ifndef __ASSEMBLY__
 
@@ -174,7 +173,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_COMPAT
 
-#define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
+/* PIE load location for compat arm. Must match ARM ELF_ET_DYN_BASE. */
+#define COMPAT_ELF_ET_DYN_BASE		0x000400000UL
 
 /* AArch32 registers. */
 #define COMPAT_ELF_NGREG		18

diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index fe5e287..b86a086 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h

@@ -30,6 +30,7 @@ static __always_inline void boot_init_stack_canary(void)
 	/* Try to get a semi random initial value. */
 	get_random_bytes(&canary, sizeof(canary));
 	canary ^= LINUX_VERSION_CODE;
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 	__stack_chk_guard = current->stack_canary;

diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 2eb714c..d0aa429 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h

@@ -63,6 +63,11 @@ extern int memcmp(const void *, const void *, size_t);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #endif

diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 13a97aa..fc28bd9 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild

@@ -1,4 +1,20 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 687a358..81f0395 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c

@@ -191,14 +191,8 @@ void __init kasan_init(void)
 		if (start >= end)
 			break;
 
-		/*
-		 * end + 1 here is intentional. We check several shadow bytes in
-		 * advance to slightly speed up fastpath. In some rare cases
-		 * we could cross boundary of mapped shadow, so we just map
-		 * some more here.
-		 */
 		vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
-				(unsigned long)kasan_mem_to_shadow(end) + 1,
+				(unsigned long)kasan_mem_to_shadow(end),
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index adc208c..decccff 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c

@@ -35,7 +35,7 @@
  * Leave enough space between the mmap area and the stack to honour ulimit in
  * the face of randomisation.
  */
-#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1))
+#define MIN_GAP (SZ_128M)
 #define MAX_GAP	(STACK_TOP/6*5)
 
 static int mmap_is_legacy(void)
@@ -65,6 +65,11 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
+
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;

diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index dc4ef9a..fe73697 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild

@@ -1,50 +1,28 @@
-
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
 generic-y += futex.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += percpu.h
 generic-y += pgalloc.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
index b9caac4..107d237 100644
--- a/arch/blackfin/include/asm/nmi.h
+++ b/arch/blackfin/include/asm/nmi.h

@@ -9,4 +9,6 @@
 
 #include <linux/nmi.h>
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif

diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index b15bf6b..aa624b4 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild

@@ -1,2 +1,24 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 633c370..1e71432 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c

@@ -190,7 +190,7 @@ static int __init init_nmi_wdt(void)
 }
 device_initcall(init_nmi_wdt);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	atomic_set(&nmi_touched[smp_processor_id()], 1);
 }

diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index a3c8d05..d717329 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild

@@ -1,8 +1,5 @@
-
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
@@ -10,55 +7,32 @@
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hw_irq.h
 generic-y += io.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += pgalloc.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += tlbflush.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 1c44d3b..67ee896 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild

@@ -1,5 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c
index 64285e0..dfd3b3b 100644
--- a/arch/cris/arch-v10/drivers/gpio.c
+++ b/arch/cris/arch-v10/drivers/gpio.c

@@ -399,7 +399,7 @@ gpio_release(struct inode *inode, struct file *filp)
 /* Main device API. ioctl's to read/set/clear bits, as well as to
  * set alarms to wait for using a subsequent select().
  */
-unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
 {
 	/* Set direction 0=unchanged 1=input,
 	 * return mask with 1=input */
@@ -450,7 +450,7 @@ unsigned long inline setget_input(struct gpio_private *priv, unsigned long arg)
 	return dir_g_in_bits;
 } /* setget_input */
 
-unsigned long inline setget_output(struct gpio_private *priv, unsigned long arg)
+inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg)
 {
 	if (USE_PORTS(priv)) {
 		*priv->dir = *priv->dir_shadow |=

diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index acc5781..460349c 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild

@@ -1,48 +1,31 @@
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += errno.h
+generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += emergency-restart.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
-generic-y += sembuf.h
-generic-y += shmbuf.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index b55fc2a..3687b54 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild

@@ -1,4 +1,21 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += types.h

diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index cce3bc36..2cf7648 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild

@@ -1,7 +1,9 @@
 
 generic-y += clkdev.h
+generic-y += device.h
 generic-y += exec.h
 generic-y += extable.h
+generic-y += fb.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h

diff --git a/arch/frv/include/asm/cmpxchg.h b/arch/frv/include/asm/cmpxchg.h
index a899765..ad1f11c 100644
--- a/arch/frv/include/asm/cmpxchg.h
+++ b/arch/frv/include/asm/cmpxchg.h

@@ -76,6 +76,7 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
  * - if (*ptr != test) then orig = *ptr;
  */
 extern uint64_t __cmpxchg_64(uint64_t test, uint64_t new, volatile uint64_t *v);
+#define cmpxchg64(p, o, n)	__cmpxchg_64((o), (n), (p))
 
 #ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
 

diff --git a/arch/frv/include/asm/device.h b/arch/frv/include/asm/device.h
deleted file mode 100644
index d8f9872..0000000
--- a/arch/frv/include/asm/device.h
+++ /dev/null

@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-

diff --git a/arch/frv/include/asm/fb.h b/arch/frv/include/asm/fb.h
deleted file mode 100644
index c7df380..0000000
--- a/arch/frv/include/asm/fb.h
+++ /dev/null

@@ -1,12 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-#include <linux/fb.h>
-
-#define fb_pgprotect(...) do {} while (0)
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_FB_H_ */

diff --git a/arch/frv/include/asm/tlbflush.h b/arch/frv/include/asm/tlbflush.h
index 7ac5eaf..7587942 100644
--- a/arch/frv/include/asm/tlbflush.h
+++ b/arch/frv/include/asm/tlbflush.h

@@ -18,10 +18,10 @@
 #ifdef CONFIG_MMU
 
 #ifndef __ASSEMBLY__
-extern void asmlinkage __flush_tlb_all(void);
-extern void asmlinkage __flush_tlb_mm(unsigned long contextid);
-extern void asmlinkage __flush_tlb_page(unsigned long contextid, unsigned long start);
-extern void asmlinkage __flush_tlb_range(unsigned long contextid,
+extern asmlinkage void __flush_tlb_all(void);
+extern asmlinkage void __flush_tlb_mm(unsigned long contextid);
+extern asmlinkage void __flush_tlb_page(unsigned long contextid, unsigned long start);
+extern asmlinkage void __flush_tlb_range(unsigned long contextid,
 					 unsigned long start, unsigned long end);
 #endif /* !__ASSEMBLY__ */
 

diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 99c8246..bc07749 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild

@@ -1,5 +1,4 @@
 generic-y += asm-offsets.h
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bugs.h
 generic-y += cacheflush.h
@@ -11,66 +10,41 @@
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += mmu.h
 generic-y += mmu_context.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
 generic-y += pgalloc.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += scatterlist.h
 generic-y += sections.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
 generic-y += spinlock.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += tlbflush.h
-generic-y += trace_clock.h
 generic-y += topology.h
-generic-y += types.h
-generic-y += ucontext.h
+generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index b55fc2a..187aed8 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild

@@ -1,4 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 0fc9cb0..3401368 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild

@@ -1,4 +1,3 @@
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bug.h
 generic-y += bugs.h
@@ -7,53 +6,32 @@
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
 generic-y += iomap.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index b55fc2a..cb5df3a 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild

@@ -1,4 +1,26 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 502a91d..1d7641f 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild

@@ -1,8 +1,6 @@
-
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 599507b..c14815d 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c

@@ -163,8 +163,3 @@ void arch_crash_save_vmcoreinfo(void)
 #endif
 }
 
-phys_addr_t paddr_vmcoreinfo_note(void)
-{
-	return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
-}
-

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 79c7c46..555b111 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c

@@ -334,7 +334,7 @@ static void ia64_mlogbuf_dump_from_init(void)
 	ia64_mlogbuf_dump();
 }
 
-static void inline
+static inline void
 ia64_mca_spin(const char *func)
 {
 	if (monarch_cpu == smp_processor_id())

diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
index 5bc34ea..b67bb4c 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c

@@ -140,7 +140,7 @@ static inline u64 __iomem *pcibr_ate_addr(struct pcibus_info *pcibus_info,
 /*
  * Update the ate.
  */
-void inline
+inline void
 ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
 	  volatile u64 ate)
 {

diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 46d3df4..3bd9abc 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c

@@ -52,7 +52,7 @@
  * All registers defined in struct tioce will meet that criteria.
  */
 
-static void inline
+static inline void
 tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;
@@ -78,7 +78,7 @@ tioce_mmr_war_pre(struct tioce_kernel *kern, void __iomem *mmr_addr)
 	}
 }
 
-static void inline
+static inline void
 tioce_mmr_war_post(struct tioce_kernel *kern, void __iomem *mmr_addr)
 {
 	u64 mmr_base;

diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c000ffa..7e11b12 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild

@@ -1,10 +1,9 @@
-
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += module.h
@@ -12,4 +11,3 @@
 generic-y += sections.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
-generic-y += kprobes.h

diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index c94ee54..1c44d3b 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild

@@ -1,4 +1,5 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y	+= siginfo.h
+generic-y += kvm_para.h
+generic-y += siginfo.h

diff --git a/arch/m68k/coldfire/intc-simr.c b/arch/m68k/coldfire/intc-simr.c
index 7cf2c15..15c4b7a 100644
--- a/arch/m68k/coldfire/intc-simr.c
+++ b/arch/m68k/coldfire/intc-simr.c

@@ -35,7 +35,7 @@
 #define	EINT7	67	/* EDGE Port interrupt 7 */
 
 static unsigned int irqebitmap[] = { 0, 1, 4, 7 };
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
 	return irqebitmap[irq - EINT0];
 }
@@ -51,7 +51,7 @@ static unsigned int inline irq2ebit(unsigned int irq)
 #define	EINT1	65	/* EDGE Port interrupt 1 */
 #define	EINT7	71	/* EDGE Port interrupt 7 */
 
-static unsigned int inline irq2ebit(unsigned int irq)
+static inline unsigned int irq2ebit(unsigned int irq)
 {
 	return irq - EINT0;
 }

diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
index 4c7b793..a3102ff 100644
--- a/arch/m68k/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig

@@ -26,7 +26,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
index a782f36..f7bb9ed 100644
--- a/arch/m68k/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig

@@ -27,7 +27,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
index 6f5fb92..1e679f6 100644
--- a/arch/m68k/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig

@@ -27,7 +27,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
index b5d7cd1..d2987b4 100644
--- a/arch/m68k/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig

@@ -27,7 +27,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
index 1b4c094..97a78c9 100644
--- a/arch/m68k/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig

@@ -27,7 +27,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
index 275ad543..766a97f 100644
--- a/arch/m68k/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig

@@ -28,7 +28,6 @@
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set

diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 5ecf4e4..59d6d0d 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild

@@ -1,36 +1,25 @@
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += futex.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
-generic-y += shmparam.h
 generic-y += spinlock.h
-generic-y += statfs.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 68b45cc..3717b64 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild

@@ -2,11 +2,21 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
 generic-y += msgbuf.h
+generic-y += resource.h
 generic-y += sembuf.h
 generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
 generic-y += socket.h
 generic-y += sockios.h
+generic-y += statfs.h
 generic-y += termbits.h
 generic-y += termios.h
+generic-y += types.h

diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 8f940553..3fba97e 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild

@@ -1,58 +1,34 @@
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += device.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
 generic-y += sections.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += timex.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index b29731e..6ac763d 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild

@@ -1,6 +1,30 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
 generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
 generic-y += resource.h
+generic-y += sembuf.h
 generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 83a4ef3..9d66f77 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild

@@ -1,22 +1,15 @@
-
 generic-y += barrier.h
 generic-y += bitops.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += hardirq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
@@ -27,31 +20,13 @@
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += siginfo.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
 generic-y += syscalls.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += ucontext.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h

diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index cb6784f..e77a596 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild

@@ -1,5 +1,28 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-generic-y += types.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index a1bdb1e..39b9f31 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h

@@ -116,7 +116,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pud_t *pud;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER);
+	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
 	if (pud)
 		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;

diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 98a117a..bab3d41 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h

@@ -47,7 +47,7 @@ extern int __cpu_logical_map[NR_CPUS];
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
 
-extern void asmlinkage smp_bootstrap(void);
+extern asmlinkage void smp_bootstrap(void);
 
 extern void calculate_cpu_foreign_map(void);
 

diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 94627a3..50c020c 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c

@@ -317,7 +317,8 @@ const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 
 	spin_lock_irqsave(&dbe_lock, flags);
 	list_for_each_entry(dbe, &dbe_list, dbe_list) {
-		e = search_extable(dbe->dbe_start, dbe->dbe_end - 1, addr);
+		e = search_extable(dbe->dbe_start,
+				   dbe->dbe_end - dbe->dbe_start, addr);
 		if (e)
 			break;
 	}

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 38dfa27..b68b4d0 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -429,7 +429,8 @@ static const struct exception_table_entry *search_dbe_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___dbe_table, __stop___dbe_table - 1, addr);
+	e = search_extable(__start___dbe_table,
+			   __stop___dbe_table - __start___dbe_table, addr);
 	if (!e)
 		e = search_module_dbetables(addr);
 	return e;

diff --git a/arch/mn10300/include/asm/nmi.h b/arch/mn10300/include/asm/nmi.h
index f3671cb..b056275 100644
--- a/arch/mn10300/include/asm/nmi.h
+++ b/arch/mn10300/include/asm/nmi.h

@@ -11,4 +11,6 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+extern void arch_touch_nmi_watchdog(void);
+
 #endif /* _ASM_NMI_H */

diff --git a/arch/mn10300/kernel/mn10300-watchdog-low.S b/arch/mn10300/kernel/mn10300-watchdog-low.S
index f2f5c9c..34f8773 100644
--- a/arch/mn10300/kernel/mn10300-watchdog-low.S
+++ b/arch/mn10300/kernel/mn10300-watchdog-low.S

@@ -50,9 +50,9 @@
 #   we can't inline it)
 #
 ###############################################################################
-	.globl	touch_nmi_watchdog
-	.type	touch_nmi_watchdog,@function
-touch_nmi_watchdog:
+	.globl	arch_touch_nmi_watchdog
+	.type	arch_touch_nmi_watchdog,@function
+arch_touch_nmi_watchdog:
 	clr	d0
 	clr	d1
 	mov	watchdog_alert_counter, a0
@@ -63,4 +63,4 @@
 	lne
 	ret	[],0
 
-	.size	touch_nmi_watchdog,.-touch_nmi_watchdog
+	.size	arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog

diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
index a2d8e69..0d5641b 100644
--- a/arch/mn10300/kernel/mn10300-watchdog.c
+++ b/arch/mn10300/kernel/mn10300-watchdog.c

@@ -31,7 +31,7 @@ static unsigned int watchdog;
 static unsigned int watchdog_hz = 1;
 unsigned int watchdog_alert_counter[NR_CPUS];
 
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 /*
  * the best way to detect whether a CPU has a 'hard lockup' problem

diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index e1a843d..896c26a 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild

@@ -1,8 +1,6 @@
 generic-y += atomic.h
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += bitops.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
@@ -12,55 +10,33 @@
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
 generic-y += spinlock.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/nios2/include/asm/signal.h b/arch/nios2/include/asm/signal.h
deleted file mode 100644
index bbcf11e..0000000
--- a/arch/nios2/include/asm/signal.h
+++ /dev/null

@@ -1,22 +0,0 @@
-/*
- * Copyright Altera Corporation (C) 2013. All rights reserved
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-#ifndef _NIOS2_SIGNAL_H
-#define _NIOS2_SIGNAL_H
-
-#include <uapi/asm/signal.h>
-
-#endif	/* _NIOS2_SIGNAL_H */

diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 51eff5b..ffca24d 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild

@@ -1,6 +1,29 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
 generic-y += setup.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
 generic-y += ucontext.h

diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 091585a..5bea416 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild

@@ -1,6 +1,4 @@
-generic-y += auxvec.h
 generic-y += barrier.h
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += cacheflush.h
@@ -11,57 +9,32 @@
 generic-y += div64.h
 generic-y += dma.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
 generic-y += pci.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
 generic-y += string.h
-generic-y += swab.h
 generic-y += switch_to.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index b55fc2a..62286db 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild

@@ -1,4 +1,31 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index a9909c2d..a411395 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild

@@ -1,5 +1,3 @@
-
-generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += current.h
@@ -11,14 +9,12 @@
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += param.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
 generic-y += seccomp.h
 generic-y += segment.h
@@ -28,4 +24,3 @@
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 3971c60..196d2a4 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild

@@ -1,4 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += kvm_para.h
+generic-y += param.h
+generic-y += poll.h
 generic-y += resource.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7177a3f..36f858c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig

@@ -82,7 +82,7 @@
 
 config NMI_IPI
 	bool
-	depends on SMP && (DEBUGGER || KEXEC_CORE)
+	depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
 	default y
 
 config STACKTRACE_SUPPORT
@@ -125,6 +125,7 @@
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
 	select ARCH_HAS_SG_CHAIN
@@ -192,11 +193,13 @@
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
-	select HAVE_NMI				if PERF_EVENTS
+	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if (PPC64 && PPC_BOOK3S)
 	select HAVE_OPROFILE
 	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE		if SMP

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 20b1485..e2329db 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h

@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
 #else
 	struct page *page;
-	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+	page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
 				4);
 	if (!page)
 		return NULL;

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 09bde6e..548d9a4 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h

@@ -23,12 +23,13 @@
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	PAGE_SIZE
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE	0x20000000
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(is_32bit_task() ? 0x000400000UL : \
+						   0x100000000UL)
 
 #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
 

diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index ff1ccb3..6f8e79c 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h

@@ -1,4 +1,15 @@
 #ifndef _ASM_NMI_H
 #define _ASM_NMI_H
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern void arch_touch_nmi_watchdog(void);
+
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+					   bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
 #endif /* _ASM_NMI_H */

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ebddb21..8ea9850 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h

@@ -55,6 +55,8 @@ struct smp_ops_t {
 	int   (*cpu_bootable)(unsigned int nr);
 };
 
+extern void smp_flush_nmi_ipi(u64 delay_us);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
 extern void smp_send_debugger_break(void);
 extern void start_secondary_resume(void);
 extern void smp_generic_give_timebase(void);

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0845eeb..4aa7c14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile

@@ -38,6 +38,7 @@
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_VDSO32)		+= vdso32/
+obj-$(CONFIG_HARDLOCKUP_DETECTOR)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c18a5f..e6d8354 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -1314,6 +1314,31 @@
 EXC_VIRT_NONE(0x5800, 0x100)
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+
+#define MASKED_DEC_HANDLER_LABEL 3f
+
+#define MASKED_DEC_HANDLER(_H)				\
+3: /* soft-nmi */					\
+	std	r12,PACA_EXGEN+EX_R12(r13);		\
+	GET_SCRATCH0(r10);				\
+	std	r10,PACA_EXGEN+EX_R13(r13);		\
+	EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+
+EXC_COMMON_BEGIN(soft_nmi_common)
+	mr	r10,r1
+	ld	r1,PACAEMERGSP(r13)
+	ld	r1,PACA_NMI_EMERG_SP(r13)
+	subi	r1,r1,INT_FRAME_SIZE
+	EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
+			system_reset, soft_nmi_interrupt,
+			ADD_NVGPRS;ADD_RECONCILE)
+	b	ret_from_except
+
+#else
+#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
+#define MASKED_DEC_HANDLER(_H)
+#endif
 
 /*
  * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1336,7 +1361,7 @@
 	lis	r10,0x7fff;				\
 	ori	r10,r10,0xffff;				\
 	mtspr	SPRN_DEC,r10;				\
-	b	2f;					\
+	b	MASKED_DEC_HANDLER_LABEL;		\
 1:	cmpwi	r10,PACA_IRQ_DBELL;			\
 	beq	2f;					\
 	cmpwi	r10,PACA_IRQ_HMI;			\
@@ -1351,7 +1376,8 @@
 	ld	r11,PACA_EXGEN+EX_R11(r13);		\
 	GET_SCRATCH0(r13);				\
 	##_H##rfid;					\
-	b	.
+	b	.;					\
+	MASKED_DEC_HANDLER(_H)
 
 /*
  * Real mode exceptions actually use this too, but alternate

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3079518..dc0c49c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c

@@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp)
 
 	phdr->p_paddr	= fadump_relocate(paddr_vmcoreinfo_note());
 	phdr->p_offset	= phdr->p_paddr;
-	phdr->p_memsz	= vmcoreinfo_max_size;
-	phdr->p_filesz	= vmcoreinfo_max_size;
+	phdr->p_memsz	= phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
 
 	/* Increment number of program headers. */
 	(elf->e_phnum)++;

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 9ad37f8..1086ea3 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c

@@ -25,6 +25,7 @@
 #include <linux/kvm_para.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/nmi.h> /* hardlockup_detector_disable() */
 
 #include <asm/reg.h>
 #include <asm/sections.h>
@@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void)
 
 static int __init kvm_guest_init(void)
 {
+	/*
+	 * The hardlockup detector is likely to get false positives in
+	 * KVM guests, so disable it by default.
+	 */
+	hardlockup_detector_disable();
+
 	if (!kvm_para_available())
 		goto free_tmp;
 

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index dd8a04f..613f79f 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c

@@ -15,6 +15,9 @@
 
 #undef DEBUG_PROM
 
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
 #include <stdarg.h>
 #include <linux/kernel.h>
 #include <linux/string.h>

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4640f6d..af23d4b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c

@@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void)
 struct ppc_pci_io ppc_pci_io;
 EXPORT_SYMBOL(ppc_pci_io);
 #endif
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
-	return ppc_proc_freq * watchdog_thresh;
-}
-
-/*
- * The hardlockup detector breaks PMU event based branches and is likely
- * to get false positives in KVM guests, so disable it by default.
- */
-static int __init disable_hardlockup_detector(void)
-{
-	hardlockup_detector_disable();
-
-	return 0;
-}
-early_initcall(disable_hardlockup_detector);
-#endif

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c6b8bac..997c88d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -435,13 +435,31 @@ static void do_smp_send_nmi_ipi(int cpu)
 	}
 }
 
+void smp_flush_nmi_ipi(u64 delay_us)
+{
+	unsigned long flags;
+
+	nmi_ipi_lock_start(&flags);
+	while (nmi_ipi_busy_count) {
+		nmi_ipi_unlock_end(&flags);
+		udelay(1);
+		if (delay_us) {
+			delay_us--;
+			if (!delay_us)
+				return;
+		}
+		nmi_ipi_lock_start(&flags);
+	}
+	nmi_ipi_unlock_end(&flags);
+}
+
 /*
  * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
  * - fn is the target callback function.
  * - delay_us > 0 is the delay before giving up waiting for targets to
  *   enter the handler, == 0 specifies indefinite delay.
  */
-static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
 {
 	unsigned long flags;
 	int me = raw_smp_processor_id();

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644
index 0000000..b67f8b0
--- /dev/null
+++ b/arch/powerpc/kernel/watchdog.c

@@ -0,0 +1,386 @@
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/paca.h>
+
+/*
+ * The watchdog has a simple timer that runs on each CPU, once per timer
+ * period. This is the heartbeat.
+ *
+ * Then there are checks to see if the heartbeat has not triggered on a CPU
+ * for the panic timeout period. Currently the watchdog only supports an
+ * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ *
+ * This is not an NMI watchdog, but Linux uses that name for a generic
+ * watchdog in some cases, so NMI gets used in some places.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly;  /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/*
+ * These are for the SMP checker. CPUs clear their pending bit in their
+ * heartbeat. If the bitmask becomes empty, the time is noted and the
+ * bitmask is refilled.
+ *
+ * All CPUs clear their bit in the pending mask every timer period.
+ * Once all have cleared, the time is noted and the bits are reset.
+ * If the time since all clear was greater than the panic timeout,
+ * we can panic with the list of stuck CPUs.
+ *
+ * This will work best with NMI IPIs for crash code so the stuck CPUs
+ * can be pulled out to get their backtraces.
+ */
+static unsigned long __wd_smp_lock;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+	/*
+	 * Avoid locking layers if possible.
+	 * This may be called from low level interrupt handlers at some
+	 * point in future.
+	 */
+	local_irq_save(*flags);
+	while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
+		cpu_relax();
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+	clear_bit_unlock(0, &__wd_smp_lock);
+	local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+	pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+	print_modules();
+	print_irqtrace_events(current);
+	if (regs)
+		show_regs(regs);
+	else
+		dump_stack();
+
+	if (hardlockup_panic)
+		nmi_panic(regs, "Hard LOCKUP");
+}
+
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		wd_smp_last_reset_tb = tb;
+		cpumask_andnot(&wd_smp_cpus_pending,
+				&wd_cpus_enabled,
+				&wd_smp_cpus_stuck);
+	}
+}
+
+static void watchdog_smp_panic(int cpu, u64 tb)
+{
+	unsigned long flags;
+	int c;
+
+	wd_smp_lock(&flags);
+	/* Double check some things under lock */
+	if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+		goto out;
+	if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+		goto out;
+	if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+		goto out;
+
+	pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
+			cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+
+	/*
+	 * Try to trigger the stuck CPUs.
+	 */
+	for_each_cpu(c, &wd_smp_cpus_pending) {
+		if (c == cpu)
+			continue;
+		smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+	}
+	smp_flush_nmi_ipi(1000000);
+
+	/* Take the stuck CPU out of the watch group */
+	for_each_cpu(c, &wd_smp_cpus_pending)
+		set_cpu_stuck(c, tb);
+
+out:
+	wd_smp_unlock(&flags);
+
+	printk_safe_flush();
+	/*
+	 * printk_safe_flush() seems to require another print
+	 * before anything actually goes out to console.
+	 */
+	if (sysctl_hardlockup_all_cpu_backtrace)
+		trigger_allbutself_cpu_backtrace();
+
+	if (hardlockup_panic)
+		nmi_panic(NULL, "Hard LOCKUP");
+}
+
+static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+{
+	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+			unsigned long flags;
+
+			pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+			wd_smp_lock(&flags);
+			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+			wd_smp_unlock(&flags);
+		}
+		return;
+	}
+	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+	if (cpumask_empty(&wd_smp_cpus_pending)) {
+		unsigned long flags;
+
+		wd_smp_lock(&flags);
+		if (cpumask_empty(&wd_smp_cpus_pending)) {
+			wd_smp_last_reset_tb = tb;
+			cpumask_andnot(&wd_smp_cpus_pending,
+					&wd_cpus_enabled,
+					&wd_smp_cpus_stuck);
+		}
+		wd_smp_unlock(&flags);
+	}
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+	u64 tb = get_tb();
+
+	per_cpu(wd_timer_tb, cpu) = tb;
+
+	wd_smp_clear_cpu_pending(cpu, tb);
+
+	if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+		watchdog_smp_panic(cpu, tb);
+}
+
+void soft_nmi_interrupt(struct pt_regs *regs)
+{
+	unsigned long flags;
+	int cpu = raw_smp_processor_id();
+	u64 tb;
+
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return;
+
+	nmi_enter();
+	tb = get_tb();
+	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+		per_cpu(wd_timer_tb, cpu) = tb;
+
+		wd_smp_lock(&flags);
+		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+			wd_smp_unlock(&flags);
+			goto out;
+		}
+		set_cpu_stuck(cpu, tb);
+
+		pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
+		else
+			dump_stack();
+
+		wd_smp_unlock(&flags);
+
+		if (sysctl_hardlockup_all_cpu_backtrace)
+			trigger_allbutself_cpu_backtrace();
+
+		if (hardlockup_panic)
+			nmi_panic(regs, "Hard LOCKUP");
+	}
+	if (wd_panic_timeout_tb < 0x7fffffff)
+		mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+out:
+	nmi_exit();
+}
+
+static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
+{
+	t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
+	if (wd_timer_period_ms > 1000)
+		t->expires = __round_jiffies_up(t->expires, cpu);
+	add_timer_on(t, cpu);
+}
+
+static void wd_timer_fn(unsigned long data)
+{
+	struct timer_list *t = this_cpu_ptr(&wd_timer);
+	int cpu = smp_processor_id();
+
+	watchdog_timer_interrupt(cpu);
+
+	wd_timer_reset(cpu, t);
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+	int cpu = smp_processor_id();
+
+	watchdog_timer_interrupt(cpu);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog_timer_on(unsigned int cpu)
+{
+	struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+	per_cpu(wd_timer_tb, cpu) = get_tb();
+
+	setup_pinned_timer(t, wd_timer_fn, 0);
+	wd_timer_reset(cpu, t);
+}
+
+static void stop_watchdog_timer_on(unsigned int cpu)
+{
+	struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+	del_timer_sync(t);
+}
+
+static int start_wd_on_cpu(unsigned int cpu)
+{
+	if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+		WARN_ON(1);
+		return 0;
+	}
+
+	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+		return 0;
+
+	if (watchdog_suspended)
+		return 0;
+
+	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+		return 0;
+
+	cpumask_set_cpu(cpu, &wd_cpus_enabled);
+	if (cpumask_weight(&wd_cpus_enabled) == 1) {
+		cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+		wd_smp_last_reset_tb = get_tb();
+	}
+	smp_wmb();
+	start_watchdog_timer_on(cpu);
+
+	return 0;
+}
+
+static int stop_wd_on_cpu(unsigned int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+		return 0; /* Can happen in CPU unplug case */
+
+	stop_watchdog_timer_on(cpu);
+
+	cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+	wd_smp_clear_cpu_pending(cpu, get_tb());
+
+	return 0;
+}
+
+static void watchdog_calc_timeouts(void)
+{
+	wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+
+	/* Have the SMP detector trigger a bit later */
+	wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+	/* 2/5 is the factor that the perf based detector uses */
+	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_nmi_reconfigure(void)
+{
+	int cpu;
+
+	watchdog_calc_timeouts();
+
+	for_each_cpu(cpu, &wd_cpus_enabled)
+		stop_wd_on_cpu(cpu);
+
+	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+		start_wd_on_cpu(cpu);
+}
+
+/*
+ * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ */
+static int __init powerpc_watchdog_init(void)
+{
+	int err;
+
+	watchdog_calc_timeouts();
+
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
+				start_wd_on_cpu, stop_wd_on_cpu);
+	if (err < 0)
+		pr_warn("Watchdog could not be initialized");
+
+	return 0;
+}
+arch_initcall(powerpc_watchdog_init);
+
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+	nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu == smp_processor_id())
+			handle_backtrace_ipi(NULL);
+		else
+			smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
+	}
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710e491..8cb0190 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
 	}
 
 	if (!hpt)
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
 				       |__GFP_NOWARN, order - PAGE_SHIFT);
 
 	if (!hpt)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index f391770..41cf5ae 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c

@@ -233,192 +233,192 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p)
 
 static void test_basic_patching(void)
 {
-	extern unsigned int ftr_fixup_test1;
-	extern unsigned int end_ftr_fixup_test1;
-	extern unsigned int ftr_fixup_test1_orig;
-	extern unsigned int ftr_fixup_test1_expected;
-	int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+	extern unsigned int ftr_fixup_test1[];
+	extern unsigned int end_ftr_fixup_test1[];
+	extern unsigned int ftr_fixup_test1_orig[];
+	extern unsigned int ftr_fixup_test1_expected[];
+	int size = end_ftr_fixup_test1 - ftr_fixup_test1;
 
 	fixup.value = fixup.mask = 8;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
 	fixup.alt_start_off = fixup.alt_end_off = 0;
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(8, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+	memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
 	patch_feature_section(~8, &fixup);
-	check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+	check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
 }
 
 static void test_alternative_patching(void)
 {
-	extern unsigned int ftr_fixup_test2;
-	extern unsigned int end_ftr_fixup_test2;
-	extern unsigned int ftr_fixup_test2_orig;
-	extern unsigned int ftr_fixup_test2_alt;
-	extern unsigned int ftr_fixup_test2_expected;
-	int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+	extern unsigned int ftr_fixup_test2[];
+	extern unsigned int end_ftr_fixup_test2[];
+	extern unsigned int ftr_fixup_test2_orig[];
+	extern unsigned int ftr_fixup_test2_alt[];
+	extern unsigned int ftr_fixup_test2_expected[];
+	int size = end_ftr_fixup_test2 - ftr_fixup_test2;
 
 	fixup.value = fixup.mask = 0xF;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(0xF, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+	memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
 	patch_feature_section(~0xF, &fixup);
-	check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+	check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
 }
 
 static void test_alternative_case_too_big(void)
 {
-	extern unsigned int ftr_fixup_test3;
-	extern unsigned int end_ftr_fixup_test3;
-	extern unsigned int ftr_fixup_test3_orig;
-	extern unsigned int ftr_fixup_test3_alt;
-	int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+	extern unsigned int ftr_fixup_test3[];
+	extern unsigned int end_ftr_fixup_test3[];
+	extern unsigned int ftr_fixup_test3_orig[];
+	extern unsigned int ftr_fixup_test3_alt[];
+	int size = end_ftr_fixup_test3 - ftr_fixup_test3;
 
 	fixup.value = fixup.mask = 0xC;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 
 	/* Expect nothing to be patched, and the error returned to us */
 	check(patch_feature_section(0xF, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 	check(patch_feature_section(0, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 	check(patch_feature_section(~0xF, &fixup) == 1);
-	check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+	check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
 }
 
 static void test_alternative_case_too_small(void)
 {
-	extern unsigned int ftr_fixup_test4;
-	extern unsigned int end_ftr_fixup_test4;
-	extern unsigned int ftr_fixup_test4_orig;
-	extern unsigned int ftr_fixup_test4_alt;
-	extern unsigned int ftr_fixup_test4_expected;
-	int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+	extern unsigned int ftr_fixup_test4[];
+	extern unsigned int end_ftr_fixup_test4[];
+	extern unsigned int ftr_fixup_test4_orig[];
+	extern unsigned int ftr_fixup_test4_alt[];
+	extern unsigned int ftr_fixup_test4_expected[];
+	int size = end_ftr_fixup_test4 - ftr_fixup_test4;
 	unsigned long flag;
 
 	/* Check a high-bit flag */
 	flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
 	fixup.value = fixup.mask = flag;
-	fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
-	fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
-	fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
-	fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+	fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+	fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+	fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+	fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
 
 	/* Sanity check */
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
 	/* Check we don't patch if the value matches */
 	patch_feature_section(flag, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 
 	/* Check we do patch if the value doesn't match */
 	patch_feature_section(0, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 
 	/* Check we do patch if the mask doesn't match */
-	memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+	memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
 	patch_feature_section(~flag, &fixup);
-	check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+	check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
 }
 
 static void test_alternative_case_with_branch(void)
 {
-	extern unsigned int ftr_fixup_test5;
-	extern unsigned int end_ftr_fixup_test5;
-	extern unsigned int ftr_fixup_test5_expected;
-	int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+	extern unsigned int ftr_fixup_test5[];
+	extern unsigned int end_ftr_fixup_test5[];
+	extern unsigned int ftr_fixup_test5_expected[];
+	int size = end_ftr_fixup_test5 - ftr_fixup_test5;
 
-	check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+	check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
 }
 
 static void test_alternative_case_with_external_branch(void)
 {
-	extern unsigned int ftr_fixup_test6;
-	extern unsigned int end_ftr_fixup_test6;
-	extern unsigned int ftr_fixup_test6_expected;
-	int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+	extern unsigned int ftr_fixup_test6[];
+	extern unsigned int end_ftr_fixup_test6[];
+	extern unsigned int ftr_fixup_test6_expected[];
+	int size = end_ftr_fixup_test6 - ftr_fixup_test6;
 
-	check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+	check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
 }
 
 static void test_cpu_macros(void)
 {
-	extern u8 ftr_fixup_test_FTR_macros;
-	extern u8 ftr_fixup_test_FTR_macros_expected;
-	unsigned long size = &ftr_fixup_test_FTR_macros_expected -
-			     &ftr_fixup_test_FTR_macros;
+	extern u8 ftr_fixup_test_FTR_macros[];
+	extern u8 ftr_fixup_test_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FTR_macros_expected -
+			     ftr_fixup_test_FTR_macros;
 
 	/* The fixups have already been done for us during boot */
-	check(memcmp(&ftr_fixup_test_FTR_macros,
-		     &ftr_fixup_test_FTR_macros_expected, size) == 0);
+	check(memcmp(ftr_fixup_test_FTR_macros,
+		     ftr_fixup_test_FTR_macros_expected, size) == 0);
 }
 
 static void test_fw_macros(void)
 {
 #ifdef CONFIG_PPC64
-	extern u8 ftr_fixup_test_FW_FTR_macros;
-	extern u8 ftr_fixup_test_FW_FTR_macros_expected;
-	unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
-			     &ftr_fixup_test_FW_FTR_macros;
+	extern u8 ftr_fixup_test_FW_FTR_macros[];
+	extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+	unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+			     ftr_fixup_test_FW_FTR_macros;
 
 	/* The fixups have already been done for us during boot */
-	check(memcmp(&ftr_fixup_test_FW_FTR_macros,
-		     &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+	check(memcmp(ftr_fixup_test_FW_FTR_macros,
+		     ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
 #endif
 }
 
 static void test_lwsync_macros(void)
 {
-	extern u8 lwsync_fixup_test;
-	extern u8 end_lwsync_fixup_test;
-	extern u8 lwsync_fixup_test_expected_LWSYNC;
-	extern u8 lwsync_fixup_test_expected_SYNC;
-	unsigned long size = &end_lwsync_fixup_test -
-			     &lwsync_fixup_test;
+	extern u8 lwsync_fixup_test[];
+	extern u8 end_lwsync_fixup_test[];
+	extern u8 lwsync_fixup_test_expected_LWSYNC[];
+	extern u8 lwsync_fixup_test_expected_SYNC[];
+	unsigned long size = end_lwsync_fixup_test -
+			     lwsync_fixup_test;
 
 	/* The fixups have already been done for us during boot */
 	if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
-		check(memcmp(&lwsync_fixup_test,
-			     &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_LWSYNC, size) == 0);
 	} else {
-		check(memcmp(&lwsync_fixup_test,
-			     &lwsync_fixup_test_expected_SYNC, size) == 0);
+		check(memcmp(lwsync_fixup_test,
+			     lwsync_fixup_test_expected_SYNC, size) == 0);
 	}
 }
 

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0ee6be4..5d78b19 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c

@@ -34,16 +34,9 @@
 /*
  * Top of mmap area (just below the process stack).
  *
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
  */
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
 static inline int mmap_is_legacy(void)
@@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
+static inline unsigned long stack_maxrandom_size(void)
+{
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+
+	/* 8MB for 32bit, 1GB for 64bit */
+	if (is_32bit_task())
+		return (1<<23);
+	else
+		return (1<<30);
+}
+
 static inline unsigned long mmap_base(unsigned long rnd)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index ec024c0..c92ed01 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h

@@ -193,14 +193,13 @@ struct arch_elf_state {
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk. 64-bit
-   tasks are aligned to 4GB. */
-#define ELF_ET_DYN_BASE (is_compat_task() ? \
-				(STACK_TOP / 3 * 2) : \
-				(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(is_compat_task() ? 0x000400000UL : \
+						    0x100000000UL)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports. */

diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 49a6bd4..3d0b14a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c

@@ -246,6 +246,7 @@ void arch_crash_save_vmcoreinfo(void)
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
 }
 
 void machine_shutdown(void)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3ae756c..3d1d808 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c

@@ -496,11 +496,6 @@ static void __init setup_memory_end(void)
 	pr_notice("The maximum memory size is %luMB\n", memory_end >> 20);
 }
 
-static void __init setup_vmcoreinfo(void)
-{
-	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
-}
-
 #ifdef CONFIG_CRASH_DUMP
 
 /*
@@ -939,7 +934,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	setup_resources();
-	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
 	cpu_detect_mhz_feature();

diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 590c91a..1a6f9c3 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild

@@ -1,39 +1,20 @@
-
-generic-y += bitsperlong.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
-generic-y += fcntl.h
-generic-y += ioctl.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
-generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += ucontext.h
 generic-y += xor.h

diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h
index 1b77f068..c9828f7 100644
--- a/arch/sh/include/asm/bug.h
+++ b/arch/sh/include/asm/bug.h

@@ -48,6 +48,7 @@ do {							\
 		   "i" (__FILE__),			\
 		   "i" (__LINE__), "i" (0),		\
 		   "i" (sizeof(struct bug_entry)));	\
+	unreachable();					\
 } while (0)
 
 #define __WARN_FLAGS(flags)				\

diff --git a/arch/sh/include/asm/stackprotector.h b/arch/sh/include/asm/stackprotector.h
index d9df3a7..141515a 100644
--- a/arch/sh/include/asm/stackprotector.h
+++ b/arch/sh/include/asm/stackprotector.h

@@ -19,6 +19,7 @@ static __always_inline void boot_init_stack_canary(void)
 	/* Try to get a semi random initial value. */
 	get_random_bytes(&canary, sizeof(canary));
 	canary ^= LINUX_VERSION_CODE;
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 	__stack_chk_guard = current->stack_canary;

diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index b55fc2a..e285313 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild

@@ -1,4 +1,22 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ipcbuf.h
+generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += siginfo.h
+generic-y += socket.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += ucontext.h

diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index d94dade..445b5e6 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c

@@ -234,7 +234,7 @@ static void sh64_icache_inv_current_user_range(unsigned long start, unsigned lon
 #define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
 static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 
-static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
+static inline void sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 {
 	/* Purge all ways in a particular block of sets, specified by the base
 	   set number and number of sets.  Can handle wrap-around, if that's

diff --git a/arch/sh/mm/extable_64.c b/arch/sh/mm/extable_64.c
index b90cdfa..7a3b4d3 100644
--- a/arch/sh/mm/extable_64.c
+++ b/arch/sh/mm/extable_64.c

@@ -10,6 +10,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/bsearch.h>
 #include <linux/rwsem.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
@@ -40,10 +41,23 @@ static const struct exception_table_entry *check_exception_ranges(unsigned long
 	return NULL;
 }
 
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > _elt->insn)
+		return 1;
+	if (_key < _elt->insn)
+		return -1;
+	return 0;
+}
+
 /* Simple binary search */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-		 const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+		 const size_t num,
 		 unsigned long value)
 {
 	const struct exception_table_entry *mid;
@@ -52,20 +66,8 @@ search_extable(const struct exception_table_entry *first,
 	if (mid)
 		return mid;
 
-        while (first <= last) {
-		long diff;
-
-		mid = (last - first) / 2 + first;
-		diff = mid->insn - value;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-
-        return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 
 int fixup_exception(struct pt_regs *regs)

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index e9e837b..80ddc01 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild

@@ -18,5 +18,4 @@
 generic-y += rwsem.h
 generic-y += serial.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += word-at-a-time.h

diff --git a/arch/sparc/include/asm/asm-prototypes.h b/arch/sparc/include/asm/asm-prototypes.h
new file mode 100644
index 0000000..d381e11
--- /dev/null
+++ b/arch/sparc/include/asm/asm-prototypes.h

@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <asm/xor.h>
+#include <asm/checksum.h>
+#include <asm/trap_block.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/ftrace.h>
+#include <asm/cacheflush.h>
+#include <asm/oplib.h>
+#include <linux/atomic.h>
+
+void *__memscan_zero(void *, size_t);
+void *__memscan_generic(void *, int, size_t);
+void *__bzero(void *, size_t);
+void VISenter(void); /* Dummy prototype to supress warning */
+#undef memcpy
+#undef memset
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+typedef int TItype __attribute__((mode(TI)));
+TItype __multi3(TItype a, TItype b);

diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index 26ad2b2..284eac3 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h

@@ -7,6 +7,7 @@ void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
+void arch_touch_nmi_watchdog(void);
 void start_nmi_watchdog(void *unused);
 void stop_nmi_watchdog(void *unused);
 

diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index b15bf6b..2178c78 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild

@@ -1,2 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += types.h

diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index e4b4e79..fa466ce 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c

@@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
 	handle_size = (sizeof(struct mdesc_handle) -
 		       sizeof(struct mdesc_hdr) +
 		       mdesc_size);
-	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT);
+	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!base)
 		return NULL;
 

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 95e73c6..048ad78 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c

@@ -51,7 +51,7 @@ static DEFINE_PER_CPU(unsigned int, last_irq_sum);
 static DEFINE_PER_CPU(long, alert_counter);
 static DEFINE_PER_CPU(int, nmi_touch);
 
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	if (atomic_read(&nmi_active)) {
 		int cpu;
@@ -61,10 +61,8 @@ void touch_nmi_watchdog(void)
 				per_cpu(nmi_touch, cpu) = 1;
 		}
 	}
-
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
 {

diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 1c6a1bd..ce17c30 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S

@@ -62,19 +62,23 @@
 ENDPROC(atomic_fetch_##op);						\
 EXPORT_SYMBOL(atomic_fetch_##op);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
+ATOMIC_OP(add)
+ATOMIC_OP_RETURN(add)
+ATOMIC_FETCH_OP(add)
 
-ATOMIC_OPS(add)
-ATOMIC_OPS(sub)
+ATOMIC_OP(sub)
+ATOMIC_OP_RETURN(sub)
+ATOMIC_FETCH_OP(sub)
 
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+ATOMIC_OP(and)
+ATOMIC_FETCH_OP(and)
 
-ATOMIC_OPS(and)
-ATOMIC_OPS(or)
-ATOMIC_OPS(xor)
+ATOMIC_OP(or)
+ATOMIC_FETCH_OP(or)
 
-#undef ATOMIC_OPS
+ATOMIC_OP(xor)
+ATOMIC_FETCH_OP(xor)
+
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -124,19 +128,23 @@
 ENDPROC(atomic64_fetch_##op);						\
 EXPORT_SYMBOL(atomic64_fetch_##op);
 
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
+ATOMIC64_OP(add)
+ATOMIC64_OP_RETURN(add)
+ATOMIC64_FETCH_OP(add)
 
-ATOMIC64_OPS(add)
-ATOMIC64_OPS(sub)
+ATOMIC64_OP(sub)
+ATOMIC64_OP_RETURN(sub)
+ATOMIC64_FETCH_OP(sub)
 
-#undef ATOMIC64_OPS
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+ATOMIC64_OP(and)
+ATOMIC64_FETCH_OP(and)
 
-ATOMIC64_OPS(and)
-ATOMIC64_OPS(or)
-ATOMIC64_OPS(xor)
+ATOMIC64_OP(or)
+ATOMIC64_FETCH_OP(or)
 
-#undef ATOMIC64_OPS
+ATOMIC64_OP(xor)
+ATOMIC64_FETCH_OP(xor)
+
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP

diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S
index f673217..6cfa521 100644
--- a/arch/sparc/lib/checksum_64.S
+++ b/arch/sparc/lib/checksum_64.S

@@ -38,6 +38,7 @@
 
 	.align		32
 	.globl		csum_partial
+	.type		csum_partial,#function
 	EXPORT_SYMBOL(csum_partial)
 csum_partial:		/* %o0=buff, %o1=len, %o2=sum */
 	prefetch	[%o0 + 0x000], #n_reads

diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index 0ecbafc..b1051e7 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S

@@ -65,6 +65,7 @@
 	 add		%o5, %o4, %o4
 
 	.globl		FUNC_NAME
+	.type		FUNC_NAME,#function
 	EXPORT_SYMBOL(FUNC_NAME)
 FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
 	LOAD(prefetch, %o0 + 0x000, #n_reads)

diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S
index daa96f4..5efee1f 100644
--- a/arch/sparc/lib/memscan_64.S
+++ b/arch/sparc/lib/memscan_64.S

@@ -14,6 +14,8 @@
 	.text
 	.align	32
 	.globl		__memscan_zero, __memscan_generic
+	.type		__memscan_zero,#function
+	.type		__memscan_generic,#function
 	.globl		memscan
 	EXPORT_SYMBOL(__memscan_zero)
 	EXPORT_SYMBOL(__memscan_generic)

diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index bb539b4..e23338d 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S

@@ -63,6 +63,7 @@
 __bzero_begin:
 
 	.globl	__bzero
+	.type	__bzero,#function
 	.globl	memset
 	EXPORT_SYMBOL(__bzero)
 	EXPORT_SYMBOL(memset)

diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
index db214e9..2422511 100644
--- a/arch/sparc/mm/extable.c
+++ b/arch/sparc/mm/extable.c

@@ -13,11 +13,11 @@ void sort_extable(struct exception_table_entry *start,
 
 /* Caller knows they are in a range if ret->fixup == 0 */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *start,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	const struct exception_table_entry *walk;
+	int i;
 
 	/* Single insn entries are encoded as:
 	 *	word 1:	insn address
@@ -37,30 +37,30 @@ search_extable(const struct exception_table_entry *start,
 	 */
 
 	/* 1. Try to find an exact match. */
-	for (walk = start; walk <= last; walk++) {
-		if (walk->fixup == 0) {
+	for (i = 0; i < num; i++) {
+		if (base[i].fixup == 0) {
 			/* A range entry, skip both parts. */
-			walk++;
+			i++;
 			continue;
 		}
 
 		/* A deleted entry; see trim_init_extable */
-		if (walk->fixup == -1)
+		if (base[i].fixup == -1)
 			continue;
 
-		if (walk->insn == value)
-			return walk;
+		if (base[i].insn == value)
+			return &base[i];
 	}
 
 	/* 2. Try to find a range match. */
-	for (walk = start; walk <= (last - 1); walk++) {
-		if (walk->fixup)
+	for (i = 0; i < (num - 1); i++) {
+		if (base[i].fixup)
 			continue;
 
-		if (walk[0].insn <= value && walk[1].insn > value)
-			return walk;
+		if (base[i].insn <= value && base[i + 1].insn > value)
+			return &base[i];
 
-		walk++;
+		i++;
 	}
 
         return NULL;

diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index cd0e32b..f80cfc6 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c

@@ -78,8 +78,8 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		return 0;
 
 	refs = 0;
-	head = pmd_page(pmd);
-	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	head = compound_head(page);
 	do {
 		VM_BUG_ON(compound_head(page) != head);
 		pages[*nr] = page;

diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 16f0b08..d28d2b8 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild

@@ -2,37 +2,18 @@
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += seccomp.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += trace_clock.h
-generic-y += types.h
 generic-y += xor.h

diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 0c74c3c..5711de0 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild

@@ -1,4 +1,23 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
 generic-y += ucontext.h

diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 7a53a55..fda7e21 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild

@@ -1,66 +1,38 @@
-
 generic-y += atomic.h
-generic-y += auxvec.h
-generic-y += bitsperlong.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
 generic-y += fb.h
-generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
+generic-y += kprobes.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
 generic-y += module.h
-generic-y += msgbuf.h
-generic-y += param.h
 generic-y += parport.h
 generic-y += percpu.h
-generic-y += poll.h
-generic-y += posix_types.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += sections.h
 generic-y += segment.h
-generic-y += sembuf.h
 generic-y += serial.h
-generic-y += setup.h
-generic-y += shmbuf.h
-generic-y += shmparam.h
-generic-y += signal.h
 generic-y += sizes.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += stat.h
-generic-y += statfs.h
-generic-y += swab.h
 generic-y += syscalls.h
-generic-y += termbits.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += types.h
-generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 1c44d3b..759a714 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild

@@ -1,5 +1,32 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += param.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += sembuf.h
+generic-y += setup.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
 generic-y += siginfo.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += swab.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 94a1868..781521b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -50,6 +50,7 @@
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
@@ -162,6 +163,7 @@
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 00241c8..a0838ab 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c

@@ -411,3 +411,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
+
+void fortify_panic(const char *name)
+{
+	error("detected buffer overflow");
+}

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d2ff779..796ff6c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h

@@ -33,7 +33,7 @@
 
 #ifdef CONFIG_X86_32
 
-extern unsigned long asmlinkage efi_call_phys(void *, ...);
+extern asmlinkage unsigned long efi_call_phys(void *, ...);
 
 #define arch_efi_call_virt_setup()	kernel_fpu_begin()
 #define arch_efi_call_virt_teardown()	kernel_fpu_end()
@@ -52,7 +52,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
 #define EFI_LOADER_SIGNATURE	"EL64"
 
-extern u64 asmlinkage efi_call(void *fp, ...);
+extern asmlinkage u64 efi_call(void *fp, ...);
 
 #define efi_call_phys(f, args...)		efi_call((f), args)
 

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index e8ab9a4..1c18d83 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h

@@ -245,12 +245,13 @@ extern int force_personality32;
 #define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE	4096
 
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE		(TASK_SIZE / 3 * 2)
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE		(mmap_is_ia32() ? 0x000400000UL : \
+						  0x100000000UL)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,

diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index dcbd9bc..8abedf1 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h

@@ -74,6 +74,7 @@ static __always_inline void boot_init_stack_canary(void)
 	get_random_bytes(&canary, sizeof(canary));
 	tsc = rdtsc();
 	canary += tsc + (tsc << 32UL);
+	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
 #ifdef CONFIG_X86_64

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 3d3e835..e9ee848 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h

@@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 }
 
 #define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, size_t);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifdef CONFIG_X86_USE_3DNOW
 
 #include <asm/mmx.h>
@@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len)
 #endif
 
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t n);
 
+extern int memcmp(const void *, const void *, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #define memcmp __builtin_memcmp
+#endif
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *cs, int c, size_t count);
@@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 	 : __memset_generic((s), (c), (count)))
 
 #define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, size_t);
+#ifndef CONFIG_FORTIFY_SOURCE
 #if (__GNUC__ >= 4)
 #define memset(s, c, count) __builtin_memset(s, c, count)
 #else
@@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
 				 (count))				\
 	 : __memset((s), (c), (count)))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 1f22bc2..2a8c822 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h

@@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
 extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
+#ifndef CONFIG_FORTIFY_SOURCE
 #ifndef CONFIG_KMEMCHECK
 #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
 #define memcpy(dst, src, len)					\
@@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
  */
 #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
 #endif
+#endif /* !CONFIG_FORTIFY_SOURCE */
 
 #define __HAVE_ARCH_MEMSET
 void *memset(void *s, int c, size_t n);
@@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct);
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memmove(dst, src, len) __memmove(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
 #endif
 
 #define __HAVE_ARCH_MEMCPY_MCSAFE 1

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c73c9fb..d6f3877 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c

@@ -19,7 +19,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return (u64)(cpu_khz) * 1000 * watchdog_thresh;

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 22217ec..44404e2 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c

@@ -457,7 +457,7 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
 	bufp += sizeof(Elf64_Phdr);
 	phdr->p_type = PT_NOTE;
 	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-	phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
 	(ehdr->e_phnum)++;
 
 #ifdef CONFIG_X86_64

diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index cad1263..2eab7d0 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c

@@ -6,7 +6,7 @@
 
 __visible void *memcpy(void *to, const void *from, size_t n)
 {
-#ifdef CONFIG_X86_USE_3DNOW
+#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE)
 	return __memcpy3d(to, from, n);
 #else
 	return __memcpy(to, from, n);

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 88215ac..02c9d75 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c

@@ -23,12 +23,7 @@ static int __init map_range(struct range *range)
 	start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
 	end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
 
-	/*
-	 * end + 1 here is intentional. We check several shadow bytes in advance
-	 * to slightly speed up fastpath. In some rare cases we could cross
-	 * boundary of mapped shadow, so we just map some more here.
-	 */
-	return vmemmap_populate(start, end + 1, NUMA_NO_NODE);
+	return vmemmap_populate(start, end, NUMA_NO_NODE);
 }
 
 static void __init clear_pgds(unsigned long start,

diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 797295e..229d04a 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c

@@ -92,13 +92,18 @@ unsigned long arch_mmap_rnd(void)
 static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
 {
 	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
 	unsigned long gap_min, gap_max;
 
+	/* Values close to RLIM_INFINITY can overflow. */
+	if (gap + pad > gap)
+		gap += pad;
+
 	/*
 	 * Top of mmap area (just below the process stack).
 	 * Leave an at least ~128 MB hole with possible stack randomization.
 	 */
-	gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+	gap_min = SIZE_128M;
 	gap_max = (task_size / 6) * 5;
 
 	if (gap < gap_min)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1d7a721..cab28cf 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c

@@ -2693,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
 	if (xen_pv_domain())
-		return virt_to_machine(&vmcoreinfo_note).maddr;
+		return virt_to_machine(vmcoreinfo_note).maddr;
 	else
-		return __pa_symbol(&vmcoreinfo_note);
+		return __pa(vmcoreinfo_note);
 }
 #endif /* CONFIG_KEXEC_CORE */

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 30f62901..c04efde 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild

@@ -1,20 +1,16 @@
-generic-y += bitsperlong.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += extable.h
-generic-y += fcntl.h
 generic-y += hardirq.h
-generic-y += ioctl.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
-generic-y += kvm_para.h
+generic-y += kprobes.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
@@ -22,13 +18,9 @@
 generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += resource.h
 generic-y += rwsem.h
 generic-y += sections.h
-generic-y += statfs.h
-generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h

diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 4cb0d2f..a5bcdfb 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild

@@ -1,3 +1,12 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bitsperlong.h
+generic-y += errno.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += kvm_para.h
+generic-y += resource.h
 generic-y += siginfo.h
+generic-y += statfs.h
+generic-y += termios.h

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 12bbc6b..60a6835 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c

@@ -3483,11 +3483,17 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 			}
 		}
 	}
-	/* Update weight both if it must be raised and if it must be lowered */
+	/*
+	 * To improve latency (for this or other queues), immediately
+	 * update weight both if it must be raised and if it must be
+	 * lowered. Since, entity may be on some active tree here, and
+	 * might have a pending change of its ioprio class, invoke
+	 * next function with the last parameter unset (see the
+	 * comments on the function).
+	 */
 	if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1))
-		__bfq_entity_update_weight_prio(
-			bfq_entity_service_tree(entity),
-			entity);
+		__bfq_entity_update_weight_prio(bfq_entity_service_tree(entity),
+						entity, false);
 }
 
 /*

diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 5c3bf98..8fd83b8 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h

@@ -892,7 +892,8 @@ void bfq_put_idle_entity(struct bfq_service_tree *st,
 			 struct bfq_entity *entity);
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-				struct bfq_entity *entity);
+				struct bfq_entity *entity,
+				bool update_class_too);
 void bfq_bfqq_served(struct bfq_queue *bfqq, int served);
 void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 			  unsigned long time_ms);

diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 8726ede..5ec05cd 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c

@@ -694,10 +694,28 @@ struct bfq_service_tree *bfq_entity_service_tree(struct bfq_entity *entity)
 	return sched_data->service_tree + idx;
 }
 
-
+/*
+ * Update weight and priority of entity. If update_class_too is true,
+ * then update the ioprio_class of entity too.
+ *
+ * The reason why the update of ioprio_class is controlled through the
+ * last parameter is as follows. Changing the ioprio class of an
+ * entity implies changing the destination service trees for that
+ * entity. If such a change occurred when the entity is already on one
+ * of the service trees for its previous class, then the state of the
+ * entity would become more complex: none of the new possible service
+ * trees for the entity, according to bfq_entity_service_tree(), would
+ * match any of the possible service trees on which the entity
+ * is. Complex operations involving these trees, such as entity
+ * activations and deactivations, should take into account this
+ * additional complexity.  To avoid this issue, this function is
+ * invoked with update_class_too unset in the points in the code where
+ * entity may happen to be on some tree.
+ */
 struct bfq_service_tree *
 __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
-				struct bfq_entity *entity)
+				struct bfq_entity *entity,
+				bool update_class_too)
 {
 	struct bfq_service_tree *new_st = old_st;
 
@@ -739,9 +757,15 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
 				  bfq_weight_to_ioprio(entity->orig_weight);
 		}
 
-		if (bfqq)
+		if (bfqq && update_class_too)
 			bfqq->ioprio_class = bfqq->new_ioprio_class;
-		entity->prio_changed = 0;
+
+		/*
+		 * Reset prio_changed only if the ioprio_class change
+		 * is not pending any longer.
+		 */
+		if (!bfqq || bfqq->ioprio_class == bfqq->new_ioprio_class)
+			entity->prio_changed = 0;
 
 		/*
 		 * NOTE: here we may be changing the weight too early,
@@ -867,7 +891,12 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
 {
 	struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
 
-	st = __bfq_entity_update_weight_prio(st, entity);
+	/*
+	 * When this function is invoked, entity is not in any service
+	 * tree, then it is safe to invoke next function with the last
+	 * parameter set (see the comments on the function).
+	 */
+	st = __bfq_entity_update_weight_prio(st, entity, true);
 	bfq_calc_finish(entity, entity->budget);
 
 	/*

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index b8a3a65..83e92be 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c

@@ -102,7 +102,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+static void bio_integrity_free(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
@@ -120,8 +120,8 @@ void bio_integrity_free(struct bio *bio)
 	}
 
 	bio->bi_integrity = NULL;
+	bio->bi_opf &= ~REQ_INTEGRITY;
 }
-EXPORT_SYMBOL(bio_integrity_free);
 
 /**
  * bio_integrity_add_page - Attach integrity metadata
@@ -160,44 +160,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 EXPORT_SYMBOL(bio_integrity_add_page);
 
 /**
- * bio_integrity_enabled - Check whether integrity can be passed
- * @bio:	bio to check
- *
- * Description: Determines whether bio_integrity_prep() can be called
- * on this bio or not.	bio data direction and target device must be
- * set prior to calling.  The functions honors the write_generate and
- * read_verify flags in sysfs.
- */
-bool bio_integrity_enabled(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
-		return false;
-
-	if (!bio_sectors(bio))
-		return false;
-
-	/* Already protected? */
-	if (bio_integrity(bio))
-		return false;
-
-	if (bi == NULL)
-		return false;
-
-	if (bio_data_dir(bio) == READ && bi->profile->verify_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_VERIFY))
-		return true;
-
-	if (bio_data_dir(bio) == WRITE && bi->profile->generate_fn != NULL &&
-	    (bi->flags & BLK_INTEGRITY_GENERATE))
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL(bio_integrity_enabled);
-
-/**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
  * @sectors:	Size of the bio in 512-byte sectors
@@ -222,10 +184,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
+ * @proc_iter:  iterator to process
  * @proc_fn:	Pointer to the relevant processing function
  */
 static blk_status_t bio_integrity_process(struct bio *bio,
-				 integrity_processing_fn *proc_fn)
+		struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_iter iter;
@@ -238,10 +201,10 @@ static blk_status_t bio_integrity_process(struct bio *bio,
 
 	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	iter.interval = 1 << bi->interval_exp;
-	iter.seed = bip_get_seed(bip);
+	iter.seed = proc_iter->bi_sector;
 	iter.prot_buf = prot_buf;
 
-	bio_for_each_segment(bv, bio, bviter) {
+	__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
 		void *kaddr = kmap_atomic(bv.bv_page);
 
 		iter.data_buf = kaddr + bv.bv_offset;
@@ -262,14 +225,15 @@ static blk_status_t bio_integrity_process(struct bio *bio,
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
  *
- * Description: Allocates a buffer for integrity metadata, maps the
- * pages and attaches them to a bio.  The bio must have data
- * direction, target device and start sector set priot to calling.  In
- * the WRITE case, integrity metadata will be generated using the
- * block device's integrity function.  In the READ case, the buffer
+ * Description:  Checks if the bio already has an integrity payload attached.
+ * If it does, the payload has been generated by another kernel subsystem,
+ * and we just pass it through. Otherwise allocates integrity payload.
+ * The bio must have data direction, target device and start sector set priot
+ * to calling.  In the WRITE case, integrity metadata will be generated using
+ * the block device's integrity function.  In the READ case, the buffer
  * will be prepared for DMA and a suitable end_io handler set up.
  */
-int bio_integrity_prep(struct bio *bio)
+bool bio_integrity_prep(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct blk_integrity *bi;
@@ -279,20 +243,41 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
 	unsigned int intervals;
+	blk_status_t status;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
-	BUG_ON(bi == NULL);
-	BUG_ON(bio_integrity(bio));
+	if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
+		return true;
 
+	if (!bio_sectors(bio))
+		return true;
+
+	/* Already protected? */
+	if (bio_integrity(bio))
+		return true;
+
+	if (bi == NULL)
+		return true;
+
+	if (bio_data_dir(bio) == READ) {
+		if (!bi->profile->verify_fn ||
+		    !(bi->flags & BLK_INTEGRITY_VERIFY))
+			return true;
+	} else {
+		if (!bi->profile->generate_fn ||
+		    !(bi->flags & BLK_INTEGRITY_GENERATE))
+			return true;
+	}
 	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
 	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
+	status = BLK_STS_RESOURCE;
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -ENOMEM;
+		goto err_end_io;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -304,7 +289,8 @@ int bio_integrity_prep(struct bio *bio)
 	if (IS_ERR(bip)) {
 		printk(KERN_ERR "could not allocate data integrity bioset\n");
 		kfree(buf);
-		return PTR_ERR(bip);
+		status = BLK_STS_RESOURCE;
+		goto err_end_io;
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
@@ -330,7 +316,7 @@ int bio_integrity_prep(struct bio *bio)
 					     bytes, offset);
 
 		if (ret == 0)
-			return 0;
+			return false;
 
 		if (ret < bytes)
 			break;
@@ -340,17 +326,18 @@ int bio_integrity_prep(struct bio *bio)
 		offset = 0;
 	}
 
-	/* Install custom I/O completion handler if read verify is enabled */
-	if (bio_data_dir(bio) == READ) {
-		bip->bip_end_io = bio->bi_end_io;
-		bio->bi_end_io = bio_integrity_endio;
-	}
-
 	/* Auto-generate integrity metadata if this is a write */
-	if (bio_data_dir(bio) == WRITE)
-		bio_integrity_process(bio, bi->profile->generate_fn);
+	if (bio_data_dir(bio) == WRITE) {
+		bio_integrity_process(bio, &bio->bi_iter,
+				      bi->profile->generate_fn);
+	}
+	return true;
 
-	return 0;
+err_end_io:
+	bio->bi_status = status;
+	bio_endio(bio);
+	return false;
+
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
@@ -368,16 +355,26 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+	struct bvec_iter iter = bio->bi_iter;
 
-	bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
+	/*
+	 * At the moment verify is called bio's iterator was advanced
+	 * during split and completion, we need to rewind iterator to
+	 * it's original position.
+	 */
+	if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
+		bio->bi_status = bio_integrity_process(bio, &iter,
+						       bi->profile->verify_fn);
+	} else {
+		bio->bi_status = BLK_STS_IOERR;
+	}
 
-	/* Restore original bio completion handler */
-	bio->bi_end_io = bip->bip_end_io;
+	bio_integrity_free(bio);
 	bio_endio(bio);
 }
 
 /**
- * bio_integrity_endio - Integrity I/O completion function
+ * __bio_integrity_endio - Integrity I/O completion function
  * @bio:	Protected bio
  * @error:	Pointer to errno
  *
@@ -388,27 +385,19 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  * in process context.	This function postpones completion
  * accordingly.
  */
-void bio_integrity_endio(struct bio *bio)
+bool __bio_integrity_endio(struct bio *bio)
 {
-	struct bio_integrity_payload *bip = bio_integrity(bio);
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	BUG_ON(bip->bip_bio != bio);
-
-	/* In case of an I/O error there is no point in verifying the
-	 * integrity metadata.  Restore original bio end_io handler
-	 * and run it.
-	 */
-	if (bio->bi_status) {
-		bio->bi_end_io = bip->bip_end_io;
-		bio_endio(bio);
-
-		return;
+		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+		queue_work(kintegrityd_wq, &bip->bip_work);
+		return false;
 	}
 
-	INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
-	queue_work(kintegrityd_wq, &bip->bip_work);
+	bio_integrity_free(bio);
+	return true;
 }
-EXPORT_SYMBOL(bio_integrity_endio);
 
 /**
  * bio_integrity_advance - Advance integrity vector
@@ -425,6 +414,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
+	bip->bip_iter.bi_sector += bytes_done >> 9;
 	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 EXPORT_SYMBOL(bio_integrity_advance);
@@ -432,22 +422,15 @@ EXPORT_SYMBOL(bio_integrity_advance);
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:	bio whose integrity vector to update
- * @offset:	offset to first data sector
- * @sectors:	number of data sectors
  *
  * Description: Used to trim the integrity vector in a cloned bio.
- * The ivec will be advanced corresponding to 'offset' data sectors
- * and the length will be truncated corresponding to 'len' data
- * sectors.
  */
-void bio_integrity_trim(struct bio *bio, unsigned int offset,
-			unsigned int sectors)
+void bio_integrity_trim(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	bio_integrity_advance(bio, offset << 9);
-	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 

diff --git a/block/bio.c b/block/bio.c
index 1cfcd0d..9a63597 100644
--- a/block/bio.c
+++ b/block/bio.c

@@ -243,9 +243,6 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
 void bio_uninit(struct bio *bio)
 {
 	bio_disassociate_task(bio);
-
-	if (bio_integrity(bio))
-		bio_integrity_free(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -1813,6 +1810,8 @@ void bio_endio(struct bio *bio)
 again:
 	if (!bio_remaining_done(bio))
 		return;
+	if (!bio_integrity_endio(bio))
+		return;
 
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
@@ -1834,6 +1833,8 @@ void bio_endio(struct bio *bio)
 	}
 
 	blk_throtl_bio_endio(bio);
+	/* release cgroup info */
+	bio_uninit(bio);
 	if (bio->bi_end_io)
 		bio->bi_end_io(bio);
 }
@@ -1868,7 +1869,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
 	split->bi_iter.bi_size = sectors << 9;
 
 	if (bio_integrity(split))
-		bio_integrity_trim(split, 0, sectors);
+		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
 
@@ -1900,6 +1901,10 @@ void bio_trim(struct bio *bio, int offset, int size)
 	bio_advance(bio, offset << 9);
 
 	bio->bi_iter.bi_size = size;
+
+	if (bio_integrity(bio))
+		bio_integrity_trim(bio);
+
 }
 EXPORT_SYMBOL_GPL(bio_trim);
 

diff --git a/block/blk-core.c b/block/blk-core.c
index af393d5..970b9c96 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -1787,11 +1787,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (op_is_flush(bio->bi_opf)) {
 		spin_lock_irq(q->queue_lock);

diff --git a/block/blk-lib.c b/block/blk-lib.c
index e8caecd7..3fe0aec 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c

@@ -261,6 +261,19 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 	return 0;
 }
 
+/*
+ * Convert a number of 512B sectors to a number of pages.
+ * The result is limited to a number of pages that can fit into a BIO.
+ * Also make sure that the result is always at least 1 (page) for the cases
+ * where nr_sects is lower than the number of sectors in a page.
+ */
+static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
+{
+	sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
+
+	return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
+}
+
 /**
  * __blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
@@ -307,18 +320,18 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 
 	ret = 0;
 	while (nr_sects != 0) {
-		bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
-				gfp_mask);
+		bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+			       gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev   = bdev;
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
 		while (nr_sects != 0) {
-			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+			sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
+			bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
 			nr_sects -= bi_size >> 9;
 			sector += bi_size >> 9;
-			if (bi_size < (sz << 9))
+			if (bi_size < sz)
 				break;
 		}
 		cond_resched();

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 7f0dc48..4ab6943 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c

@@ -515,10 +515,12 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	}
 
 	/*
-	 * Default to 256, since we don't split into sync/async like the
-	 * old code did. Additionally, this is a per-hw queue depth.
+	 * Default to double of smaller one between hw queue_depth and 128,
+	 * since we don't split into sync/async like the old code did.
+	 * Additionally, this is a per-hw queue depth.
 	 */
-	q->nr_requests = 2 * BLKDEV_MAX_RQ;
+	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
+				   BLKDEV_MAX_RQ);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_sched_alloc_tags(q, hctx, i);

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6cef42f..041f7b7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c

@@ -1547,10 +1547,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio);
 
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio_io_error(bio);
+	if (!bio_integrity_prep(bio))
 		return BLK_QC_T_NONE;
-	}
 
 	if (!is_flush_fua && !blk_queue_nomerges(q) &&
 	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))

diff --git a/block/blk.h b/block/blk.h
index 01ebb81..3a3d715 100644
--- a/block/blk.h
+++ b/block/blk.h

@@ -81,10 +81,21 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
+bool __bio_integrity_endio(struct bio *);
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	if (bio_integrity(bio))
+		return __bio_integrity_endio(bio);
+	return true;
+}
 #else
 static inline void blk_flush_integrity(void)
 {
 }
+static inline bool bio_integrity_endio(struct bio *bio)
+{
+	return true;
+}
 #endif
 
 void blk_timeout_work(struct work_struct *work);

diff --git a/block/t10-pi.c b/block/t10-pi.c
index 3416dad..a98db38 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c

@@ -28,9 +28,6 @@
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
-static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
-static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
-
 static __be16 t10_pi_crc_fn(void *data, unsigned int len)
 {
 	return cpu_to_be16(crc_t10dif(data, len));
@@ -82,7 +79,7 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 		switch (type) {
 		case 1:
 		case 2:
-			if (pi->app_tag == APP_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE)
 				goto next;
 
 			if (be32_to_cpu(pi->ref_tag) !=
@@ -95,8 +92,8 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
 			}
 			break;
 		case 3:
-			if (pi->app_tag == APP_ESCAPE &&
-			    pi->ref_tag == REF_ESCAPE)
+			if (pi->app_tag == T10_PI_APP_ESCAPE &&
+			    pi->ref_tag == T10_PI_REF_ESCAPE)
 				goto next;
 			break;
 		}

diff --git a/certs/Makefile b/certs/Makefile
index 4119bb3..847361c 100644
--- a/certs/Makefile
+++ b/certs/Makefile

@@ -26,7 +26,7 @@
 targets += x509_certificate_list
 $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(SYSTEM_TRUSTED_KEYS_FILENAME) FORCE
 	$(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS))
-endif
+endif # CONFIG_SYSTEM_TRUSTED_KEYRING
 
 clean-files := x509_certificate_list .x509.list
 
@@ -87,7 +87,7 @@
 	@echo >>$@ "keyUsage=digitalSignature"
 	@echo >>$@ "subjectKeyIdentifier=hash"
 	@echo >>$@ "authorityKeyIdentifier=keyid"
-endif
+endif # CONFIG_MODULE_SIG_KEY
 
 $(eval $(call config_filename,MODULE_SIG_KEY))
 
@@ -102,4 +102,4 @@
 targets += signing_key.x509
 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE
 	$(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY))
-endif
+endif # CONFIG_MODULE_SIG

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 10347e3..e51a1e9 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c

@@ -576,7 +576,7 @@ static struct attribute *lpss_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group lpss_attr_group = {
+static const struct attribute_group lpss_attr_group = {
 	.attrs = lpss_attrs,
 	.name = "lpss_ltr",
 };

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index d048f72..a3215ee 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c

@@ -31,6 +31,11 @@
 #define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
 				(1 << ACPI_IORT_NODE_SMMU_V3))
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
+#endif
+
 struct iort_its_msi_chip {
 	struct list_head	list;
 	struct fwnode_handle	*fw_node;
@@ -819,6 +824,36 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
 	return num_res;
 }
 
+static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu)
+{
+	/*
+	 * Cavium ThunderX2 implementation doesn't not support unique
+	 * irq line. Use single irq line for all the SMMUv3 interrupts.
+	 */
+	if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+		return false;
+
+	/*
+	 * ThunderX2 doesn't support MSIs from the SMMU, so we're checking
+	 * SPI numbers here.
+	 */
+	return smmu->event_gsiv == smmu->pri_gsiv &&
+	       smmu->event_gsiv == smmu->gerr_gsiv &&
+	       smmu->event_gsiv == smmu->sync_gsiv;
+}
+
+static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
+{
+	/*
+	 * Override the size, for Cavium ThunderX2 implementation
+	 * which doesn't support the page 1 SMMU register space.
+	 */
+	if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+		return SZ_64K;
+
+	return SZ_128K;
+}
+
 static void __init arm_smmu_v3_init_resources(struct resource *res,
 					      struct acpi_iort_node *node)
 {
@@ -829,30 +864,38 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
 	smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
 	res[num_res].start = smmu->base_address;
-	res[num_res].end = smmu->base_address + SZ_128K - 1;
+	res[num_res].end = smmu->base_address +
+				arm_smmu_v3_resource_size(smmu) - 1;
 	res[num_res].flags = IORESOURCE_MEM;
 
 	num_res++;
+	if (arm_smmu_v3_is_combined_irq(smmu)) {
+		if (smmu->event_gsiv)
+			acpi_iort_register_irq(smmu->event_gsiv, "combined",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+	} else {
 
-	if (smmu->event_gsiv)
-		acpi_iort_register_irq(smmu->event_gsiv, "eventq",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
+		if (smmu->event_gsiv)
+			acpi_iort_register_irq(smmu->event_gsiv, "eventq",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
 
-	if (smmu->pri_gsiv)
-		acpi_iort_register_irq(smmu->pri_gsiv, "priq",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
+		if (smmu->pri_gsiv)
+			acpi_iort_register_irq(smmu->pri_gsiv, "priq",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
 
-	if (smmu->gerr_gsiv)
-		acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
+		if (smmu->gerr_gsiv)
+			acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
 
-	if (smmu->sync_gsiv)
-		acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
-				       ACPI_EDGE_SENSITIVE,
-				       &res[num_res++]);
+		if (smmu->sync_gsiv)
+			acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
+					       ACPI_EDGE_SENSITIVE,
+					       &res[num_res++]);
+	}
 }
 
 static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)

diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index df1c629..75af783 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c

@@ -76,7 +76,7 @@ static struct bin_attribute *bgrt_bin_attributes[] = {
 	NULL,
 };
 
-static struct attribute_group bgrt_attribute_group = {
+static const struct attribute_group bgrt_attribute_group = {
 	.attrs = bgrt_attributes,
 	.bin_attrs = bgrt_bin_attributes,
 };

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 5a6fbe0..af74b42 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c

@@ -409,11 +409,15 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 	    (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
 		driver->ops.notify(adev, type);
 
-	if (hotplug_event && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
+	if (!hotplug_event) {
+		acpi_bus_put_acpi_device(adev);
+		return;
+	}
+
+	if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
 		return;
 
 	acpi_bus_put_acpi_device(adev);
-	return;
 
  err:
 	acpi_evaluate_ost(handle, type, ost_code, NULL);

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 28938b5..2ed6935 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c

@@ -262,8 +262,10 @@ int acpi_bus_init_power(struct acpi_device *device)
 		return -EINVAL;
 
 	device->power.state = ACPI_STATE_UNKNOWN;
-	if (!acpi_device_is_present(device))
+	if (!acpi_device_is_present(device)) {
+		device->flags.initialized = false;
 		return -ENXIO;
+	}
 
 	result = acpi_device_get_power(device, &state);
 	if (result)

diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c
index 734642d..e1c2425 100644
--- a/drivers/acpi/dptf/dptf_power.c
+++ b/drivers/acpi/dptf/dptf_power.c

@@ -65,7 +65,7 @@ static struct attribute *dptf_power_attrs[] = {
 	NULL
 };
 
-static struct attribute_group dptf_power_attribute_group = {
+static const struct attribute_group dptf_power_attribute_group = {
 	.attrs = dptf_power_attrs,
 	.name = "dptf_power"
 };

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index be79f7d..9531d32 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h

@@ -111,7 +111,7 @@ int acpi_device_setup_files(struct acpi_device *dev);
 void acpi_device_remove_files(struct acpi_device *dev);
 void acpi_device_add_finalize(struct acpi_device *device);
 void acpi_free_pnp_ids(struct acpi_device_pnp *pnp);
-bool acpi_device_is_present(struct acpi_device *adev);
+bool acpi_device_is_present(const struct acpi_device *adev);
 bool acpi_device_is_battery(struct acpi_device *adev);
 bool acpi_device_is_first_physical_node(struct acpi_device *adev,
 					const struct device *dev);

diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
index 849f9d2..723bee5 100644
--- a/drivers/acpi/osi.c
+++ b/drivers/acpi/osi.c

@@ -265,7 +265,8 @@ static void __init acpi_osi_dmi_darwin(bool enable,
 	__acpi_osi_setup_darwin(enable);
 }
 
-void __init acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d)
+static void __init acpi_osi_dmi_linux(bool enable,
+				      const struct dmi_system_id *d)
 {
 	pr_notice("DMI detected to setup _OSI(\"Linux\"): %s\n", d->ident);
 	osi_config.linux_dmi = 1;

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 3a6c9b7..1b475bc 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c

@@ -352,7 +352,7 @@ static struct attribute *attrs[] = {
 	NULL,
 };
 
-static struct attribute_group attr_groups[] = {
+static const struct attribute_group attr_groups[] = {
 	[ACPI_STATE_D0] = {
 		.name = "power_resources_D0",
 		.attrs = attrs,
@@ -371,14 +371,14 @@ static struct attribute_group attr_groups[] = {
 	},
 };
 
-static struct attribute_group wakeup_attr_group = {
+static const struct attribute_group wakeup_attr_group = {
 	.name = "power_resources_wakeup",
 	.attrs = attrs,
 };
 
 static void acpi_power_hide_list(struct acpi_device *adev,
 				 struct list_head *resources,
-				 struct attribute_group *attr_group)
+				 const struct attribute_group *attr_group)
 {
 	struct acpi_power_resource_entry *entry;
 
@@ -397,7 +397,7 @@ static void acpi_power_hide_list(struct acpi_device *adev,
 
 static void acpi_power_expose_list(struct acpi_device *adev,
 				   struct list_head *resources,
-				   struct attribute_group *attr_group)
+				   const struct attribute_group *attr_group)
 {
 	struct acpi_power_resource_entry *entry;
 	int ret;
@@ -425,7 +425,7 @@ static void acpi_power_expose_list(struct acpi_device *adev,
 
 static void acpi_power_expose_hide(struct acpi_device *adev,
 				   struct list_head *resources,
-				   struct attribute_group *attr_group,
+				   const struct attribute_group *attr_group,
 				   bool expose)
 {
 	if (expose)

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 9364398..917c789 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c

@@ -57,6 +57,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
 
 	dn->name = link->package.elements[0].string.pointer;
 	dn->fwnode.type = FWNODE_ACPI_DATA;
+	dn->fwnode.ops = &acpi_fwnode_ops;
 	dn->parent = parent;
 	INIT_LIST_HEAD(&dn->data.subnodes);
 
@@ -1119,3 +1120,119 @@ int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode,
 
 	return 0;
 }
+
+static bool acpi_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	if (!is_acpi_device_node(fwnode))
+		return false;
+
+	return acpi_device_is_present(to_acpi_device_node(fwnode));
+}
+
+static bool acpi_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !acpi_node_prop_get(fwnode, propname, NULL);
+}
+
+static int acpi_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					       const char *propname,
+					       unsigned int elem_size,
+					       void *val, size_t nval)
+{
+	enum dev_prop_type type;
+
+	switch (elem_size) {
+	case sizeof(u8):
+		type = DEV_PROP_U8;
+		break;
+	case sizeof(u16):
+		type = DEV_PROP_U16;
+		break;
+	case sizeof(u32):
+		type = DEV_PROP_U32;
+		break;
+	case sizeof(u64):
+		type = DEV_PROP_U64;
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	return acpi_node_prop_read(fwnode, propname, type, val, nval);
+}
+
+static int acpi_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
+				   val, nval);
+}
+
+static struct fwnode_handle *
+acpi_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+				 const char *childname)
+{
+	struct fwnode_handle *child;
+
+	/*
+	 * Find first matching named child node of this fwnode.
+	 * For ACPI this will be a data only sub-node.
+	 */
+	fwnode_for_each_child_node(fwnode, child)
+		if (acpi_data_node_match(child, childname))
+			return child;
+
+	return NULL;
+}
+
+static struct fwnode_handle *
+acpi_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				    struct fwnode_handle *prev)
+{
+	struct fwnode_handle *endpoint;
+
+	endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
+	if (IS_ERR(endpoint))
+		return NULL;
+
+	return endpoint;
+}
+
+static struct fwnode_handle *
+acpi_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, &endpoint);
+
+	return endpoint;
+}
+
+static int acpi_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					    struct fwnode_endpoint *endpoint)
+{
+	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
+
+	endpoint->local_fwnode = fwnode;
+
+	fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
+	fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
+
+	return 0;
+}
+
+const struct fwnode_operations acpi_fwnode_ops = {
+	.device_is_available = acpi_fwnode_device_is_available,
+	.property_present = acpi_fwnode_property_present,
+	.property_read_int_array = acpi_fwnode_property_read_int_array,
+	.property_read_string_array = acpi_fwnode_property_read_string_array,
+	.get_parent = acpi_node_get_parent,
+	.get_next_child_node = acpi_get_next_subnode,
+	.get_named_child_node = acpi_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = acpi_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = acpi_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = acpi_node_get_parent,
+	.graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint,
+};

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 09f65f5..3389729 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c

@@ -404,10 +404,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		error = dock_notify(adev, src);
 	} else if (adev->flags.hotplug_notify) {
 		error = acpi_generic_hotplug_event(adev, src);
-		if (error == -EPERM) {
-			ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED;
-			goto err_out;
-		}
 	} else {
 		int (*notify)(struct acpi_device *, u32);
 
@@ -423,8 +419,20 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
 		else
 			goto out;
 	}
-	if (!error)
+	switch (error) {
+	case 0:
 		ost_code = ACPI_OST_SC_SUCCESS;
+		break;
+	case -EPERM:
+		ost_code = ACPI_OST_SC_EJECT_NOT_SUPPORTED;
+		break;
+	case -EBUSY:
+		ost_code = ACPI_OST_SC_DEVICE_BUSY;
+		break;
+	default:
+		ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+		break;
+	}
 
  err_out:
 	acpi_evaluate_ost(adev->handle, src, ost_code, NULL);
@@ -1460,6 +1468,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
 	device->fwnode.type = FWNODE_ACPI;
+	device->fwnode.ops = &acpi_fwnode_ops;
 	acpi_set_device_status(device, sta);
 	acpi_device_get_busid(device);
 	acpi_set_pnp_ids(handle, &device->pnp, type);
@@ -1592,13 +1601,9 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type,
 	return 0;
 }
 
-bool acpi_device_is_present(struct acpi_device *adev)
+bool acpi_device_is_present(const struct acpi_device *adev)
 {
-	if (adev->status.present || adev->status.functional)
-		return true;
-
-	adev->flags.initialized = false;
-	return false;
+	return adev->status.present || adev->status.functional;
 }
 
 static bool acpi_scan_handler_matching(struct acpi_scan_handler *handler,
@@ -1831,6 +1836,7 @@ static void acpi_bus_attach(struct acpi_device *device)
 	acpi_bus_get_status(device);
 	/* Skip devices that are not present. */
 	if (!acpi_device_is_present(device)) {
+		device->flags.initialized = false;
 		acpi_device_clear_enumerated(device);
 		device->flags.power_manageable = 0;
 		return;

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 3afa8c1..4ac3e06 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c

@@ -36,6 +36,26 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
 	return false;
 }
 
+/*
+ * APM X-Gene v1 and v2 UART hardware is an 16550 like device but has its
+ * register aligned to 32-bit. In addition, the BIOS also encoded the
+ * access width to be 8 bits. This function detects this errata condition.
+ */
+static bool xgene_8250_erratum_present(struct acpi_table_spcr *tb)
+{
+	if (tb->interface_type != ACPI_DBG2_16550_COMPATIBLE)
+		return false;
+
+	if (memcmp(tb->header.oem_id, "APMC0D", ACPI_OEM_ID_SIZE))
+		return false;
+
+	if (!memcmp(tb->header.oem_table_id, "XGENESPC",
+	    ACPI_OEM_TABLE_ID_SIZE) && tb->header.oem_revision == 0)
+		return true;
+
+	return false;
+}
+
 /**
  * parse_spcr() - parse ACPI SPCR table and add preferred console
  *
@@ -74,8 +94,22 @@ int __init parse_spcr(bool earlycon)
 		goto done;
 	}
 
-	iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
-			"mmio" : "io";
+	if (table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		switch (table->serial_port.access_width) {
+		default:
+			pr_err("Unexpected SPCR Access Width.  Defaulting to byte size\n");
+		case ACPI_ACCESS_SIZE_BYTE:
+			iotype = "mmio";
+			break;
+		case ACPI_ACCESS_SIZE_WORD:
+			iotype = "mmio16";
+			break;
+		case ACPI_ACCESS_SIZE_DWORD:
+			iotype = "mmio32";
+			break;
+		}
+	} else
+		iotype = "io";
 
 	switch (table->interface_type) {
 	case ACPI_DBG2_ARM_SBSA_32BIT:
@@ -115,6 +149,8 @@ int __init parse_spcr(bool earlycon)
 
 	if (qdf2400_erratum_44_present(&table->header))
 		uart = "qdf2400_e44";
+	if (xgene_8250_erratum_present(table))
+		iotype = "mmio32";
 
 	snprintf(opts, sizeof(opts), "%s,%s,0x%llx,%d", uart, iotype,
 		 table->serial_port.address, baud_rate);

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 73d39bc..d8dc830 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c

@@ -288,7 +288,7 @@ static void node_device_release(struct device *dev)
  *
  * Initialize and register the node device.
  */
-static int register_node(struct node *node, int num, struct node *parent)
+static int register_node(struct node *node, int num)
 {
 	int error;
 
@@ -567,19 +567,14 @@ static void init_node_hugetlb_work(int nid) { }
 
 int __register_one_node(int nid)
 {
-	int p_node = parent_node(nid);
-	struct node *parent = NULL;
 	int error;
 	int cpu;
 
-	if (p_node != nid)
-		parent = node_devices[p_node];
-
 	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
 	if (!node_devices[nid])
 		return -ENOMEM;
 
-	error = register_node(node_devices[nid], nid, parent);
+	error = register_node(node_devices[nid], nid);
 
 	/* link cpu under this node */
 	for_each_present_cpu(cpu) {

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9649dce..3b8210e 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c

@@ -1148,8 +1148,8 @@ static void genpd_syscore_switch(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 
-	genpd = genpd_lookup_dev(dev);
-	if (!genpd)
+	genpd = dev_to_genpd(dev);
+	if (!pm_genpd_present(genpd))
 		return;
 
 	if (suspend) {
@@ -1180,6 +1180,7 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 #define pm_genpd_resume_noirq		NULL
 #define pm_genpd_freeze_noirq		NULL
 #define pm_genpd_thaw_noirq		NULL
+#define pm_genpd_poweroff_noirq		NULL
 #define pm_genpd_restore_noirq		NULL
 #define pm_genpd_complete		NULL
 

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 149de31..692007e 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c

@@ -187,6 +187,50 @@ struct fwnode_handle *dev_fwnode(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_fwnode);
 
+static bool pset_fwnode_property_present(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	return !!pset_prop_get(to_pset_node(fwnode), propname);
+}
+
+static int pset_fwnode_read_int_array(struct fwnode_handle *fwnode,
+				      const char *propname,
+				      unsigned int elem_size, void *val,
+				      size_t nval)
+{
+	struct property_set *node = to_pset_node(fwnode);
+
+	if (!val)
+		return pset_prop_count_elems_of_size(node, propname, elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return pset_prop_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return pset_prop_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return pset_prop_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return pset_prop_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int pset_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  const char **val, size_t nval)
+{
+	return pset_prop_read_string_array(to_pset_node(fwnode), propname,
+					   val, nval);
+}
+
+static const struct fwnode_operations pset_fwnode_ops = {
+	.property_present = pset_fwnode_property_present,
+	.property_read_int_array = pset_fwnode_read_int_array,
+	.property_read_string_array = pset_fwnode_property_read_string_array,
+};
+
 /**
  * device_property_present - check if a property of a device is present
  * @dev: Device whose property is being checked
@@ -200,18 +244,6 @@ bool device_property_present(struct device *dev, const char *propname)
 }
 EXPORT_SYMBOL_GPL(device_property_present);
 
-static bool __fwnode_property_present(struct fwnode_handle *fwnode,
-				      const char *propname)
-{
-	if (is_of_node(fwnode))
-		return of_property_read_bool(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return !acpi_node_prop_get(fwnode, propname, NULL);
-	else if (is_pset_node(fwnode))
-		return !!pset_prop_get(to_pset_node(fwnode), propname);
-	return false;
-}
-
 /**
  * fwnode_property_present - check if a property of a firmware node is present
  * @fwnode: Firmware node whose property to check
@@ -221,10 +253,11 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
 {
 	bool ret;
 
-	ret = __fwnode_property_present(fwnode, propname);
+	ret = fwnode_call_int_op(fwnode, property_present, propname);
 	if (ret == false && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_present(fwnode->secondary, propname);
+		ret = fwnode_call_int_op(fwnode->secondary, property_present,
+					 propname);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_present);
@@ -398,42 +431,23 @@ int device_property_match_string(struct device *dev, const char *propname,
 }
 EXPORT_SYMBOL_GPL(device_property_match_string);
 
-#define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval))	\
-	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
+static int fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					  const char *propname,
+					  unsigned int elem_size, void *val,
+					  size_t nval)
+{
+	int ret;
 
-#define PSET_PROP_READ_ARRAY(node, propname, type, val, nval)				\
-	(val) ? pset_prop_read_##type##_array((node), (propname), (val), (nval))	\
-	      : pset_prop_count_elems_of_size((node), (propname), sizeof(type))
+	ret = fwnode_call_int_op(fwnode, property_read_int_array, propname,
+				 elem_size, val, nval);
+	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
+	    !IS_ERR_OR_NULL(fwnode->secondary))
+		ret = fwnode_call_int_op(
+			fwnode->secondary, property_read_int_array, propname,
+			elem_size, val, nval);
 
-#define FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	if (is_of_node(_fwnode_))							\
-		_ret_ = OF_DEV_PROP_READ_ARRAY(to_of_node(_fwnode_), _propname_,	\
-					       _type_, _val_, _nval_);			\
-	else if (is_acpi_node(_fwnode_))						\
-		_ret_ = acpi_node_prop_read(_fwnode_, _propname_, _proptype_,		\
-					    _val_, _nval_);				\
-	else if (is_pset_node(_fwnode_)) 						\
-		_ret_ = PSET_PROP_READ_ARRAY(to_pset_node(_fwnode_), _propname_,	\
-					     _type_, _val_, _nval_);			\
-	else										\
-		_ret_ = -ENXIO;								\
-	_ret_;										\
-})
-
-#define FWNODE_PROP_READ_ARRAY(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_)	\
-({											\
-	int _ret_;									\
-	_ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,		\
-				 _val_, _nval_);					\
-	if (_ret_ == -EINVAL && !IS_ERR_OR_NULL(_fwnode_) &&				\
-	    !IS_ERR_OR_NULL(_fwnode_->secondary))					\
-		_ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,	\
-				_proptype_, _val_, _nval_);				\
-	_ret_;										\
-})
+	return ret;
+}
 
 /**
  * fwnode_property_read_u8_array - return a u8 array property of firmware node
@@ -456,8 +470,8 @@ EXPORT_SYMBOL_GPL(device_property_match_string);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u8, DEV_PROP_U8,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u8),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 
@@ -482,8 +496,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
 int fwnode_property_read_u16_array(struct fwnode_handle *fwnode,
 				   const char *propname, u16 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u16, DEV_PROP_U16,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u16),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 
@@ -508,8 +522,8 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
 int fwnode_property_read_u32_array(struct fwnode_handle *fwnode,
 				   const char *propname, u32 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u32, DEV_PROP_U32,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u32),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 
@@ -534,29 +548,11 @@ EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
 int fwnode_property_read_u64_array(struct fwnode_handle *fwnode,
 				   const char *propname, u64 *val, size_t nval)
 {
-	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u64, DEV_PROP_U64,
-				      val, nval);
+	return fwnode_property_read_int_array(fwnode, propname, sizeof(u64),
+					      val, nval);
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);
 
-static int __fwnode_property_read_string_array(struct fwnode_handle *fwnode,
-					       const char *propname,
-					       const char **val, size_t nval)
-{
-	if (is_of_node(fwnode))
-		return val ?
-			of_property_read_string_array(to_of_node(fwnode),
-						      propname, val, nval) :
-			of_property_count_strings(to_of_node(fwnode), propname);
-	else if (is_acpi_node(fwnode))
-		return acpi_node_prop_read(fwnode, propname, DEV_PROP_STRING,
-					   val, nval);
-	else if (is_pset_node(fwnode))
-		return pset_prop_read_string_array(to_pset_node(fwnode),
-						   propname, val, nval);
-	return -ENXIO;
-}
-
 /**
  * fwnode_property_read_string_array - return string array property of a node
  * @fwnode: Firmware node to get the property of
@@ -581,11 +577,13 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
 {
 	int ret;
 
-	ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
+	ret = fwnode_call_int_op(fwnode, property_read_string_array, propname,
+				 val, nval);
 	if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) &&
 	    !IS_ERR_OR_NULL(fwnode->secondary))
-		ret = __fwnode_property_read_string_array(fwnode->secondary,
-							  propname, val, nval);
+		ret = fwnode_call_int_op(fwnode->secondary,
+					 property_read_string_array, propname,
+					 val, nval);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
@@ -903,6 +901,7 @@ int device_add_properties(struct device *dev,
 		return PTR_ERR(p);
 
 	p->fwnode.type = FWNODE_PDATA;
+	p->fwnode.ops = &pset_fwnode_ops;
 	set_secondary_fwnode(dev, &p->fwnode);
 	return 0;
 }
@@ -938,19 +937,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_parent);
  */
 struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		parent = acpi_node_get_parent(fwnode);
-	}
-
-	return parent;
+	return fwnode_call_ptr_op(fwnode, get_parent);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_parent);
 
@@ -962,18 +949,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent);
 struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode,
 						 struct fwnode_handle *child)
 {
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_get_next_available_child(to_of_node(fwnode),
-						   to_of_node(child));
-		if (node)
-			return &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		return acpi_get_next_subnode(fwnode, child);
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_next_child_node, child);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
 
@@ -1005,23 +981,7 @@ EXPORT_SYMBOL_GPL(device_get_next_child_node);
 struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode,
 						  const char *childname)
 {
-	struct fwnode_handle *child;
-
-	/*
-	 * Find first matching named child node of this fwnode.
-	 * For ACPI this will be a data only sub-node.
-	 */
-	fwnode_for_each_child_node(fwnode, child) {
-		if (is_of_node(child)) {
-			if (!of_node_cmp(to_of_node(child)->name, childname))
-				return child;
-		} else if (is_acpi_data_node(child)) {
-			if (acpi_data_node_match(child, childname))
-				return child;
-		}
-	}
-
-	return NULL;
+	return fwnode_call_ptr_op(fwnode, get_named_child_node, childname);
 }
 EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);
 
@@ -1043,8 +1003,7 @@ EXPORT_SYMBOL_GPL(device_get_named_child_node);
  */
 void fwnode_handle_get(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_get(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, get);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_get);
 
@@ -1058,12 +1017,21 @@ EXPORT_SYMBOL_GPL(fwnode_handle_get);
  */
 void fwnode_handle_put(struct fwnode_handle *fwnode)
 {
-	if (is_of_node(fwnode))
-		of_node_put(to_of_node(fwnode));
+	fwnode_call_void_op(fwnode, put);
 }
 EXPORT_SYMBOL_GPL(fwnode_handle_put);
 
 /**
+ * fwnode_device_is_available - check if a device is available for use
+ * @fwnode: Pointer to the fwnode of the device.
+ */
+bool fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return fwnode_call_int_op(fwnode, device_is_available);
+}
+EXPORT_SYMBOL_GPL(fwnode_device_is_available);
+
+/**
  * device_get_child_node_count - return the number of child nodes for device
  * @dev: Device to cound the child nodes for
  */
@@ -1198,28 +1166,31 @@ struct fwnode_handle *
 fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
 			       struct fwnode_handle *prev)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_next_endpoint(to_of_node(fwnode),
-						  to_of_node(prev));
-
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
-		if (IS_ERR(endpoint))
-			endpoint = NULL;
-	}
-
-	return endpoint;
-
+	return fwnode_call_ptr_op(fwnode, graph_get_next_endpoint, prev);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 
 /**
+ * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint
+ * @endpoint: Endpoint firmware node of the port
+ *
+ * Return: the firmware node of the device the @endpoint belongs to.
+ */
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *endpoint)
+{
+	struct fwnode_handle *port, *parent;
+
+	port = fwnode_get_parent(endpoint);
+	parent = fwnode_call_ptr_op(port, graph_get_port_parent);
+
+	fwnode_handle_put(port);
+
+	return parent;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent);
+
+/**
  * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device
  * @fwnode: Endpoint firmware node pointing to the remote endpoint
  *
@@ -1228,22 +1199,12 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint);
 struct fwnode_handle *
 fwnode_graph_get_remote_port_parent(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *parent = NULL;
+	struct fwnode_handle *endpoint, *parent;
 
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
+	endpoint = fwnode_graph_get_remote_endpoint(fwnode);
+	parent = fwnode_graph_get_port_parent(endpoint);
 
-		node = of_graph_get_remote_port_parent(to_of_node(fwnode));
-		if (node)
-			parent = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, &parent, NULL,
-						     NULL);
-		if (ret)
-			return NULL;
-	}
+	fwnode_handle_put(endpoint);
 
 	return parent;
 }
@@ -1257,23 +1218,7 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent);
  */
 struct fwnode_handle *fwnode_graph_get_remote_port(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *port = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_graph_get_remote_port(to_of_node(fwnode));
-		if (node)
-			port = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, &port, NULL);
-		if (ret)
-			return NULL;
-	}
-
-	return port;
+	return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode));
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 
@@ -1286,29 +1231,48 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port);
 struct fwnode_handle *
 fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
 {
-	struct fwnode_handle *endpoint = NULL;
-
-	if (is_of_node(fwnode)) {
-		struct device_node *node;
-
-		node = of_parse_phandle(to_of_node(fwnode), "remote-endpoint",
-					0);
-		if (node)
-			endpoint = &node->fwnode;
-	} else if (is_acpi_node(fwnode)) {
-		int ret;
-
-		ret = acpi_graph_get_remote_endpoint(fwnode, NULL, NULL,
-						     &endpoint);
-		if (ret)
-			return NULL;
-	}
-
-	return endpoint;
+	return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint);
 }
 EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 
 /**
+ * fwnode_graph_get_remote_node - get remote parent node for given port/endpoint
+ * @fwnode: pointer to parent fwnode_handle containing graph port/endpoint
+ * @port_id: identifier of the parent port node
+ * @endpoint_id: identifier of the endpoint node
+ *
+ * Return: Remote fwnode handle associated with remote endpoint node linked
+ *	   to @node. Use fwnode_node_put() on it when done.
+ */
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port_id, u32 endpoint_id)
+{
+	struct fwnode_handle *endpoint = NULL;
+
+	while ((endpoint = fwnode_graph_get_next_endpoint(fwnode, endpoint))) {
+		struct fwnode_endpoint fwnode_ep;
+		struct fwnode_handle *remote;
+		int ret;
+
+		ret = fwnode_graph_parse_endpoint(endpoint, &fwnode_ep);
+		if (ret < 0)
+			continue;
+
+		if (fwnode_ep.port != port_id || fwnode_ep.id != endpoint_id)
+			continue;
+
+		remote = fwnode_graph_get_remote_port_parent(endpoint);
+		if (!remote)
+			return NULL;
+
+		return fwnode_device_is_available(remote) ? remote : NULL;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_node);
+
+/**
  * fwnode_graph_parse_endpoint - parse common endpoint node properties
  * @fwnode: pointer to endpoint fwnode_handle
  * @endpoint: pointer to the fwnode endpoint data structure
@@ -1320,22 +1284,8 @@ EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint);
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint)
 {
-	struct fwnode_handle *port_fwnode = fwnode_get_parent(fwnode);
-
 	memset(endpoint, 0, sizeof(*endpoint));
 
-	endpoint->local_fwnode = fwnode;
-
-	if (is_acpi_node(port_fwnode)) {
-		fwnode_property_read_u32(port_fwnode, "port", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "endpoint", &endpoint->id);
-	} else {
-		fwnode_property_read_u32(port_fwnode, "reg", &endpoint->port);
-		fwnode_property_read_u32(fwnode, "reg", &endpoint->id);
-	}
-
-	fwnode_handle_put(port_fwnode);
-
-	return 0;
+	return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint);
 }
 EXPORT_SYMBOL(fwnode_graph_parse_endpoint);

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 17723fd5..104b71c 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c

@@ -9,6 +9,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/major.h>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 02a6119..678af94 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -1944,6 +1944,13 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
 	return;
 }
 
+static void cciss_initialize_rq(struct request *rq)
+{
+	struct scsi_request *sreq = blk_mq_rq_to_pdu(rq);
+
+	scsi_req_init(sreq);
+}
+
 /*
  * cciss_add_disk sets up the block device queue for a logical drive
  */
@@ -1956,6 +1963,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 
 	disk->queue->cmd_size = sizeof(struct scsi_request);
 	disk->queue->request_fn = do_cciss_request;
+	disk->queue->initialize_rq_fn = cciss_initialize_rq;
 	disk->queue->queue_lock = &h->lock;
 	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
 	if (blk_init_allocated_queue(disk->queue) < 0)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 61b046f..4a3cfc7 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c

@@ -174,7 +174,6 @@ static void mtip_init_cmd_header(struct request *rq)
 {
 	struct driver_data *dd = rq->q->queuedata;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
-	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
 
 	/* Point the command headers at the command tables. */
 	cmd->command_header = dd->port->command_list +
@@ -182,7 +181,7 @@ static void mtip_init_cmd_header(struct request *rq)
 	cmd->command_header_dma = dd->port->command_list_dma +
 				(sizeof(struct mtip_cmd_hdr) * rq->tag);
 
-	if (host_cap_64)
+	if (test_bit(MTIP_PF_HOST_CAP_64, &dd->port->flags))
 		cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16);
 
 	cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF);
@@ -386,6 +385,7 @@ static void mtip_init_port(struct mtip_port *port)
 			 port->mmio + PORT_LST_ADDR_HI);
 		writel((port->rxfis_dma >> 16) >> 16,
 			 port->mmio + PORT_FIS_ADDR_HI);
+		set_bit(MTIP_PF_HOST_CAP_64, &port->flags);
 	}
 
 	writel(port->command_list_dma & 0xFFFFFFFF,
@@ -950,7 +950,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	unsigned long to;
 	bool active = true;
 
-	blk_mq_stop_hw_queues(port->dd->queue);
+	blk_mq_quiesce_queue(port->dd->queue);
 
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
@@ -970,10 +970,10 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 			break;
 	} while (time_before(jiffies, to));
 
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return active ? -EBUSY : 0;
 err_fault:
-	blk_mq_start_stopped_hw_queues(port->dd->queue, true);
+	blk_mq_unquiesce_queue(port->dd->queue);
 	return -EFAULT;
 }
 
@@ -2737,6 +2737,9 @@ static void mtip_abort_cmd(struct request *req, void *data,
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2749,6 +2752,9 @@ static void mtip_queue_cmd(struct request *req, void *data,
 {
 	struct driver_data *dd = data;
 
+	if (!blk_mq_request_started(req))
+		return;
+
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 }
@@ -2814,6 +2820,8 @@ static int mtip_service_thread(void *data)
 				dev_warn(&dd->pdev->dev,
 					"Completion workers still active!");
 
+			blk_mq_quiesce_queue(dd->queue);
+
 			spin_lock(dd->queue->queue_lock);
 			blk_mq_tagset_busy_iter(&dd->tags,
 							mtip_queue_cmd, dd);
@@ -2826,6 +2834,8 @@ static int mtip_service_thread(void *data)
 							mtip_abort_cmd, dd);
 
 			clear_bit(MTIP_PF_TO_ACTIVE_BIT, &dd->port->flags);
+
+			blk_mq_unquiesce_queue(dd->queue);
 		}
 
 		if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
@@ -3995,8 +4005,9 @@ static int mtip_block_remove(struct driver_data *dd)
 						dd->disk->disk_name);
 
 	blk_freeze_queue_start(dd->queue);
-	blk_mq_stop_hw_queues(dd->queue);
+	blk_mq_quiesce_queue(dd->queue);
 	blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd);
+	blk_mq_unquiesce_queue(dd->queue);
 
 	/*
 	 * Delete our gendisk structure. This also removes the device

diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index e8286af..e20e55d 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h

@@ -140,6 +140,7 @@ enum {
 				(1 << MTIP_PF_SE_ACTIVE_BIT) |
 				(1 << MTIP_PF_DM_ACTIVE_BIT) |
 				(1 << MTIP_PF_TO_ACTIVE_BIT)),
+	MTIP_PF_HOST_CAP_64         = 10, /* cache HOST_CAP_64 */
 
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 977ec96..dea7d85 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c

@@ -661,9 +661,9 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 
 static void nbd_clear_que(struct nbd_device *nbd)
 {
-	blk_mq_stop_hw_queues(nbd->disk->queue);
+	blk_mq_quiesce_queue(nbd->disk->queue);
 	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
-	blk_mq_start_hw_queues(nbd->disk->queue);
+	blk_mq_unquiesce_queue(nbd->disk->queue);
 	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
 }
 

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 71f4422..85c24ca 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c

@@ -844,9 +844,6 @@ static int __init null_init(void)
 		queue_mode = NULL_Q_MQ;
 	}
 
-	if (queue_mode == NULL_Q_MQ && shared_tags)
-		null_init_tag_set(&tag_set);
-
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -858,11 +855,19 @@ static int __init null_init(void)
 	else if (!submit_queues)
 		submit_queues = 1;
 
+	if (queue_mode == NULL_Q_MQ && shared_tags) {
+		ret = null_init_tag_set(&tag_set);
+		if (ret)
+			return ret;
+	}
+
 	mutex_init(&lock);
 
 	null_major = register_blkdev(0, "nullb");
-	if (null_major < 0)
-		return null_major;
+	if (null_major < 0) {
+		ret = null_major;
+		goto err_tagset;
+	}
 
 	if (use_lightnvm) {
 		ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
@@ -891,6 +896,9 @@ static int __init null_init(void)
 	kmem_cache_destroy(ppa_cache);
 err_ppa:
 	unregister_blkdev(null_major, "nullb");
+err_tagset:
+	if (queue_mode == NULL_Q_MQ && shared_tags)
+		blk_mq_free_tag_set(&tag_set);
 	return ret;
 }
 

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7..4e02aa5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -840,7 +840,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vblk->config_work);
 
-	blk_mq_stop_hw_queues(vblk->disk->queue);
+	blk_mq_quiesce_queue(vblk->disk->queue);
 
 	vdev->config->del_vqs(vdev);
 	return 0;
@@ -857,7 +857,7 @@ static int virtblk_restore(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
-	blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	blk_mq_unquiesce_queue(vblk->disk->queue);
 	return 0;
 }
 #endif

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 12046f4..5b8992b 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c

@@ -68,13 +68,11 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
 
 bool zcomp_available_algorithm(const char *comp)
 {
-	int i = 0;
+	int i;
 
-	while (backends[i]) {
-		if (sysfs_streq(comp, backends[i]))
-			return true;
-		i++;
-	}
+	i = __sysfs_match_string(backends, -1, comp);
+	if (i >= 0)
+		return true;
 
 	/*
 	 * Crypto does not ignore a trailing new line symbol,

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d3e3af2..856d5dc02 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c

@@ -1124,7 +1124,7 @@ static struct attribute *zram_disk_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group zram_disk_attr_group = {
+static const struct attribute_group zram_disk_attr_group = {
 	.attrs = zram_disk_attrs,
 };
 

diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 90f3edf..f6fa056 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig

@@ -5,6 +5,7 @@
 menuconfig IPMI_HANDLER
        tristate 'IPMI top-level message handler'
        depends on HAS_IOMEM
+       select IPMI_DMI_DECODE if DMI
        help
          This enables the central IPMI message handler, required for IPMI
 	 to work.
@@ -16,6 +17,9 @@
 
 	 If unsure, say N.
 
+config IPMI_DMI_DECODE
+       bool
+
 if IPMI_HANDLER
 
 config IPMI_PANIC_EVENT

diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 0d98cd9..eefb0b3 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile

@@ -7,6 +7,7 @@
 obj-$(CONFIG_IPMI_HANDLER) += ipmi_msghandler.o
 obj-$(CONFIG_IPMI_DEVICE_INTERFACE) += ipmi_devintf.o
 obj-$(CONFIG_IPMI_SI) += ipmi_si.o
+obj-$(CONFIG_IPMI_DMI_DECODE) += ipmi_dmi.o
 obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o

diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c
new file mode 100644
index 0000000..2a84401
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_dmi.c

@@ -0,0 +1,273 @@
+/*
+ * A hack to create a platform device from a DMI entry.  This will
+ * allow autoloading of the IPMI drive based on SMBIOS entries.
+ */
+
+#include <linux/ipmi.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include "ipmi_dmi.h"
+
+struct ipmi_dmi_info {
+	int type;
+	u32 flags;
+	unsigned long addr;
+	u8 slave_addr;
+	struct ipmi_dmi_info *next;
+};
+
+static struct ipmi_dmi_info *ipmi_dmi_infos;
+
+static int ipmi_dmi_nr __initdata;
+
+static void __init dmi_add_platform_ipmi(unsigned long base_addr,
+					 u32 flags,
+					 u8 slave_addr,
+					 int irq,
+					 int offset,
+					 int type)
+{
+	struct platform_device *pdev;
+	struct resource r[4];
+	unsigned int num_r = 1, size;
+	struct property_entry p[4] = {
+		PROPERTY_ENTRY_U8("slave-addr", slave_addr),
+		PROPERTY_ENTRY_U8("ipmi-type", type),
+		PROPERTY_ENTRY_U16("i2c-addr", base_addr),
+		{ }
+	};
+	char *name, *override;
+	int rv;
+	struct ipmi_dmi_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		pr_warn("ipmi:dmi: Could not allocate dmi info\n");
+	} else {
+		info->type = type;
+		info->flags = flags;
+		info->addr = base_addr;
+		info->slave_addr = slave_addr;
+		info->next = ipmi_dmi_infos;
+		ipmi_dmi_infos = info;
+	}
+
+	name = "dmi-ipmi-si";
+	override = "ipmi_si";
+	switch (type) {
+	case IPMI_DMI_TYPE_SSIF:
+		name = "dmi-ipmi-ssif";
+		override = "ipmi_ssif";
+		offset = 1;
+		size = 1;
+		break;
+	case IPMI_DMI_TYPE_BT:
+		size = 3;
+		break;
+	case IPMI_DMI_TYPE_KCS:
+	case IPMI_DMI_TYPE_SMIC:
+		size = 2;
+		break;
+	default:
+		pr_err("ipmi:dmi: Invalid IPMI type: %d", type);
+		return;
+	}
+
+	pdev = platform_device_alloc(name, ipmi_dmi_nr);
+	if (!pdev) {
+		pr_err("ipmi:dmi: Error allocation IPMI platform device");
+		return;
+	}
+	pdev->driver_override = override;
+
+	if (type == IPMI_DMI_TYPE_SSIF)
+		goto add_properties;
+
+	memset(r, 0, sizeof(r));
+
+	r[0].start = base_addr;
+	r[0].end = r[0].start + offset - 1;
+	r[0].name = "IPMI Address 1";
+	r[0].flags = flags;
+
+	if (size > 1) {
+		r[1].start = r[0].start + offset;
+		r[1].end = r[1].start + offset - 1;
+		r[1].name = "IPMI Address 2";
+		r[1].flags = flags;
+		num_r++;
+	}
+
+	if (size > 2) {
+		r[2].start = r[1].start + offset;
+		r[2].end = r[2].start + offset - 1;
+		r[2].name = "IPMI Address 3";
+		r[2].flags = flags;
+		num_r++;
+	}
+
+	if (irq) {
+		r[num_r].start = irq;
+		r[num_r].end = irq;
+		r[num_r].name = "IPMI IRQ";
+		r[num_r].flags = IORESOURCE_IRQ;
+		num_r++;
+	}
+
+	rv = platform_device_add_resources(pdev, r, num_r);
+	if (rv) {
+		dev_err(&pdev->dev,
+			"ipmi:dmi: Unable to add resources: %d\n", rv);
+		goto err;
+	}
+
+add_properties:
+	rv = platform_device_add_properties(pdev, p);
+	if (rv) {
+		dev_err(&pdev->dev,
+			"ipmi:dmi: Unable to add properties: %d\n", rv);
+		goto err;
+	}
+
+	rv = platform_device_add(pdev);
+	if (rv) {
+		dev_err(&pdev->dev, "ipmi:dmi: Unable to add device: %d\n", rv);
+		goto err;
+	}
+
+	ipmi_dmi_nr++;
+	return;
+
+err:
+	platform_device_put(pdev);
+}
+
+/*
+ * Look up the slave address for a given interface.  This is here
+ * because ACPI doesn't have a slave address while SMBIOS does, but we
+ * prefer using ACPI so the ACPI code can use the IPMI namespace.
+ * This function allows an ACPI-specified IPMI device to look up the
+ * slave address from the DMI table.
+ */
+int ipmi_dmi_get_slave_addr(int type, u32 flags, unsigned long base_addr)
+{
+	struct ipmi_dmi_info *info = ipmi_dmi_infos;
+
+	while (info) {
+		if (info->type == type &&
+		    info->flags == flags &&
+		    info->addr == base_addr)
+			return info->slave_addr;
+		info = info->next;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ipmi_dmi_get_slave_addr);
+
+#define DMI_IPMI_MIN_LENGTH	0x10
+#define DMI_IPMI_VER2_LENGTH	0x12
+#define DMI_IPMI_TYPE		4
+#define DMI_IPMI_SLAVEADDR	6
+#define DMI_IPMI_ADDR		8
+#define DMI_IPMI_ACCESS		0x10
+#define DMI_IPMI_IRQ		0x11
+#define DMI_IPMI_IO_MASK	0xfffe
+
+static void __init dmi_decode_ipmi(const struct dmi_header *dm)
+{
+	const u8	*data = (const u8 *) dm;
+	u32             flags = IORESOURCE_IO;
+	unsigned long	base_addr;
+	u8              len = dm->length;
+	u8              slave_addr;
+	int             irq = 0, offset;
+	int             type;
+
+	if (len < DMI_IPMI_MIN_LENGTH)
+		return;
+
+	type = data[DMI_IPMI_TYPE];
+	slave_addr = data[DMI_IPMI_SLAVEADDR];
+
+	memcpy(&base_addr, data + DMI_IPMI_ADDR, sizeof(unsigned long));
+	if (len >= DMI_IPMI_VER2_LENGTH) {
+		if (type == IPMI_DMI_TYPE_SSIF) {
+			offset = 0;
+			flags = 0;
+			base_addr = data[DMI_IPMI_ADDR] >> 1;
+			if (base_addr == 0) {
+				/*
+				 * Some broken systems put the I2C address in
+				 * the slave address field.  We try to
+				 * accommodate them here.
+				 */
+				base_addr = data[DMI_IPMI_SLAVEADDR] >> 1;
+				slave_addr = 0;
+			}
+		} else {
+			if (base_addr & 1) {
+				/* I/O */
+				base_addr &= DMI_IPMI_IO_MASK;
+			} else {
+				/* Memory */
+				flags = IORESOURCE_MEM;
+			}
+
+			/*
+			 * If bit 4 of byte 0x10 is set, then the lsb
+			 * for the address is odd.
+			 */
+			base_addr |= (data[DMI_IPMI_ACCESS] >> 4) & 1;
+
+			irq = data[DMI_IPMI_IRQ];
+
+			/*
+			 * The top two bits of byte 0x10 hold the
+			 * register spacing.
+			 */
+			switch ((data[DMI_IPMI_ACCESS] >> 6) & 3) {
+			case 0: /* Byte boundaries */
+				offset = 1;
+				break;
+			case 1: /* 32-bit boundaries */
+				offset = 4;
+				break;
+			case 2: /* 16-byte boundaries */
+				offset = 16;
+				break;
+			default:
+				pr_err("ipmi:dmi: Invalid offset: 0");
+				return;
+			}
+		}
+	} else {
+		/* Old DMI spec. */
+		/*
+		 * Note that technically, the lower bit of the base
+		 * address should be 1 if the address is I/O and 0 if
+		 * the address is in memory.  So many systems get that
+		 * wrong (and all that I have seen are I/O) so we just
+		 * ignore that bit and assume I/O.  Systems that use
+		 * memory should use the newer spec, anyway.
+		 */
+		base_addr = base_addr & DMI_IPMI_IO_MASK;
+		offset = 1;
+	}
+
+	dmi_add_platform_ipmi(base_addr, flags, slave_addr, irq,
+			      offset, type);
+}
+
+static int __init scan_for_dmi_ipmi(void)
+{
+	const struct dmi_device *dev = NULL;
+
+	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
+		dmi_decode_ipmi((const struct dmi_header *) dev->device_data);
+
+	return 0;
+}
+subsys_initcall(scan_for_dmi_ipmi);

diff --git a/drivers/char/ipmi/ipmi_dmi.h b/drivers/char/ipmi/ipmi_dmi.h
new file mode 100644
index 0000000..0a1afe5
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_dmi.h

@@ -0,0 +1,12 @@
+/*
+ * DMI defines for use by IPMI
+ */
+
+#define IPMI_DMI_TYPE_KCS	0x01
+#define IPMI_DMI_TYPE_SMIC	0x02
+#define IPMI_DMI_TYPE_BT	0x03
+#define IPMI_DMI_TYPE_SSIF	0x04
+
+#ifdef CONFIG_IPMI_DMI_DECODE
+int ipmi_dmi_get_slave_addr(int type, u32 flags, unsigned long base_addr);
+#endif

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9f69995..810b138 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c

@@ -2397,7 +2397,7 @@ static umode_t bmc_dev_attr_is_visible(struct kobject *kobj,
 	return mode;
 }
 
-static struct attribute_group bmc_dev_attr_group = {
+static const struct attribute_group bmc_dev_attr_group = {
 	.attrs		= bmc_dev_attrs,
 	.is_visible	= bmc_dev_attr_is_visible,
 };
@@ -2407,7 +2407,7 @@ static const struct attribute_group *bmc_dev_attr_groups[] = {
 	NULL
 };
 
-static struct device_type bmc_device_type = {
+static const struct device_type bmc_device_type = {
 	.groups		= bmc_dev_attr_groups,
 };
 
@@ -3878,6 +3878,9 @@ static void smi_recv_tasklet(unsigned long val)
 	 * because the lower layer is allowed to hold locks while calling
 	 * message delivery.
 	 */
+
+	rcu_read_lock();
+
 	if (!run_to_completion)
 		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
 	if (intf->curr_msg == NULL && !intf->in_shutdown) {
@@ -3900,6 +3903,8 @@ static void smi_recv_tasklet(unsigned long val)
 	if (newmsg)
 		intf->handlers->sender(intf->send_info, newmsg);
 
+	rcu_read_unlock();
+
 	handle_new_recv_msgs(intf);
 }
 

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 59ee93e..9859738 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c

@@ -61,6 +61,7 @@
 #include <linux/ipmi_smi.h>
 #include <asm/io.h>
 #include "ipmi_si_sm.h"
+#include "ipmi_dmi.h"
 #include <linux/dmi.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
@@ -1942,7 +1943,7 @@ static int hotmod_handler(const char *val, struct kernel_param *kp)
 				info->io.regspacing = DEFAULT_REGSPACING;
 			info->io.regsize = regsize;
 			if (!info->io.regsize)
-				info->io.regsize = DEFAULT_REGSPACING;
+				info->io.regsize = DEFAULT_REGSIZE;
 			info->io.regshift = regshift;
 			info->irq = irq;
 			if (info->irq)
@@ -2036,7 +2037,7 @@ static int hardcode_find_bmc(void)
 			info->io.regspacing = DEFAULT_REGSPACING;
 		info->io.regsize = regsizes[i];
 		if (!info->io.regsize)
-			info->io.regsize = DEFAULT_REGSPACING;
+			info->io.regsize = DEFAULT_REGSIZE;
 		info->io.regshift = regshifts[i];
 		info->irq = irqs[i];
 		if (info->irq)
@@ -2273,136 +2274,105 @@ static void spmi_find_bmc(void)
 }
 #endif
 
-#ifdef CONFIG_DMI
-struct dmi_ipmi_data {
-	u8   		type;
-	u8   		addr_space;
-	unsigned long	base_addr;
-	u8   		irq;
-	u8              offset;
-	u8              slave_addr;
-};
-
-static int decode_dmi(const struct dmi_header *dm,
-				struct dmi_ipmi_data *dmi)
+#if defined(CONFIG_DMI) || defined(CONFIG_ACPI)
+struct resource *ipmi_get_info_from_resources(struct platform_device *pdev,
+					      struct smi_info *info)
 {
-	const u8	*data = (const u8 *)dm;
-	unsigned long  	base_addr;
-	u8		reg_spacing;
-	u8              len = dm->length;
+	struct resource *res, *res_second;
 
-	dmi->type = data[4];
-
-	memcpy(&base_addr, data+8, sizeof(unsigned long));
-	if (len >= 0x11) {
-		if (base_addr & 1) {
-			/* I/O */
-			base_addr &= 0xFFFE;
-			dmi->addr_space = IPMI_IO_ADDR_SPACE;
-		} else
-			/* Memory */
-			dmi->addr_space = IPMI_MEM_ADDR_SPACE;
-
-		/* If bit 4 of byte 0x10 is set, then the lsb for the address
-		   is odd. */
-		dmi->base_addr = base_addr | ((data[0x10] & 0x10) >> 4);
-
-		dmi->irq = data[0x11];
-
-		/* The top two bits of byte 0x10 hold the register spacing. */
-		reg_spacing = (data[0x10] & 0xC0) >> 6;
-		switch (reg_spacing) {
-		case 0x00: /* Byte boundaries */
-		    dmi->offset = 1;
-		    break;
-		case 0x01: /* 32-bit boundaries */
-		    dmi->offset = 4;
-		    break;
-		case 0x02: /* 16-byte boundaries */
-		    dmi->offset = 16;
-		    break;
-		default:
-		    /* Some other interface, just ignore it. */
-		    return -EIO;
-		}
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res) {
+		info->io_setup = port_setup;
+		info->io.addr_type = IPMI_IO_ADDR_SPACE;
 	} else {
-		/* Old DMI spec. */
-		/*
-		 * Note that technically, the lower bit of the base
-		 * address should be 1 if the address is I/O and 0 if
-		 * the address is in memory.  So many systems get that
-		 * wrong (and all that I have seen are I/O) so we just
-		 * ignore that bit and assume I/O.  Systems that use
-		 * memory should use the newer spec, anyway.
-		 */
-		dmi->base_addr = base_addr & 0xfffe;
-		dmi->addr_space = IPMI_IO_ADDR_SPACE;
-		dmi->offset = 1;
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (res) {
+			info->io_setup = mem_setup;
+			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
+		}
 	}
+	if (!res) {
+		dev_err(&pdev->dev, "no I/O or memory address\n");
+		return NULL;
+	}
+	info->io.addr_data = res->start;
 
-	dmi->slave_addr = data[6];
+	info->io.regspacing = DEFAULT_REGSPACING;
+	res_second = platform_get_resource(pdev,
+			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
+					IORESOURCE_IO : IORESOURCE_MEM,
+			       1);
+	if (res_second) {
+		if (res_second->start > info->io.addr_data)
+			info->io.regspacing =
+				res_second->start - info->io.addr_data;
+	}
+	info->io.regsize = DEFAULT_REGSIZE;
+	info->io.regshift = 0;
 
-	return 0;
+	return res;
 }
 
-static void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
+#endif
+
+#ifdef CONFIG_DMI
+static int dmi_ipmi_probe(struct platform_device *pdev)
 {
 	struct smi_info *info;
+	u8 type, slave_addr;
+	int rv;
+
+	if (!si_trydmi)
+		return -ENODEV;
+
+	rv = device_property_read_u8(&pdev->dev, "ipmi-type", &type);
+	if (rv)
+		return -ENODEV;
 
 	info = smi_info_alloc();
 	if (!info) {
 		pr_err(PFX "Could not allocate SI data\n");
-		return;
+		return -ENOMEM;
 	}
 
 	info->addr_source = SI_SMBIOS;
 	pr_info(PFX "probing via SMBIOS\n");
 
-	switch (ipmi_data->type) {
-	case 0x01: /* KCS */
+	switch (type) {
+	case IPMI_DMI_TYPE_KCS:
 		info->si_type = SI_KCS;
 		break;
-	case 0x02: /* SMIC */
+	case IPMI_DMI_TYPE_SMIC:
 		info->si_type = SI_SMIC;
 		break;
-	case 0x03: /* BT */
+	case IPMI_DMI_TYPE_BT:
 		info->si_type = SI_BT;
 		break;
 	default:
 		kfree(info);
-		return;
+		return -EINVAL;
 	}
 
-	switch (ipmi_data->addr_space) {
-	case IPMI_MEM_ADDR_SPACE:
-		info->io_setup = mem_setup;
-		info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-		break;
-
-	case IPMI_IO_ADDR_SPACE:
-		info->io_setup = port_setup;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-		break;
-
-	default:
-		kfree(info);
-		pr_warn(PFX "Unknown SMBIOS I/O Address type: %d\n",
-			ipmi_data->addr_space);
-		return;
+	if (!ipmi_get_info_from_resources(pdev, info)) {
+		rv = -EINVAL;
+		goto err_free;
 	}
-	info->io.addr_data = ipmi_data->base_addr;
 
-	info->io.regspacing = ipmi_data->offset;
-	if (!info->io.regspacing)
-		info->io.regspacing = DEFAULT_REGSPACING;
-	info->io.regsize = DEFAULT_REGSPACING;
-	info->io.regshift = 0;
+	rv = device_property_read_u8(&pdev->dev, "slave-addr", &slave_addr);
+	if (rv) {
+		dev_warn(&pdev->dev, "device has no slave-addr property");
+		info->slave_addr = 0x20;
+	} else {
+		info->slave_addr = slave_addr;
+	}
 
-	info->slave_addr = ipmi_data->slave_addr;
-
-	info->irq = ipmi_data->irq;
-	if (info->irq)
+	info->irq = platform_get_irq(pdev, 0);
+	if (info->irq > 0)
 		info->irq_setup = std_irq_setup;
+	else
+		info->irq = 0;
+
+	info->dev = &pdev->dev;
 
 	pr_info("ipmi_si: SMBIOS: %s %#lx regsize %d spacing %d irq %d\n",
 		(info->io.addr_type == IPMI_IO_ADDR_SPACE) ? "io" : "mem",
@@ -2411,21 +2381,17 @@ static void try_init_dmi(struct dmi_ipmi_data *ipmi_data)
 
 	if (add_smi(info))
 		kfree(info);
+
+	return 0;
+
+err_free:
+	kfree(info);
+	return rv;
 }
-
-static void dmi_find_bmc(void)
+#else
+static int dmi_ipmi_probe(struct platform_device *pdev)
 {
-	const struct dmi_device *dev = NULL;
-	struct dmi_ipmi_data data;
-	int                  rv;
-
-	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) {
-		memset(&data, 0, sizeof(data));
-		rv = decode_dmi((const struct dmi_header *) dev->device_data,
-				&data);
-		if (!rv)
-			try_init_dmi(&data);
-	}
+	return -ENODEV;
 }
 #endif /* CONFIG_DMI */
 
@@ -2684,17 +2650,47 @@ static int of_ipmi_probe(struct platform_device *dev)
 #endif
 
 #ifdef CONFIG_ACPI
+static int find_slave_address(struct smi_info *info, int slave_addr)
+{
+#ifdef CONFIG_IPMI_DMI_DECODE
+	if (!slave_addr) {
+		int type = -1;
+		u32 flags = IORESOURCE_IO;
+
+		switch (info->si_type) {
+		case SI_KCS:
+			type = IPMI_DMI_TYPE_KCS;
+			break;
+		case SI_BT:
+			type = IPMI_DMI_TYPE_BT;
+			break;
+		case SI_SMIC:
+			type = IPMI_DMI_TYPE_SMIC;
+			break;
+		}
+
+		if (info->io.addr_type == IPMI_MEM_ADDR_SPACE)
+			flags = IORESOURCE_MEM;
+
+		slave_addr = ipmi_dmi_get_slave_addr(type, flags,
+						     info->io.addr_data);
+	}
+#endif
+
+	return slave_addr;
+}
+
 static int acpi_ipmi_probe(struct platform_device *dev)
 {
 	struct smi_info *info;
-	struct resource *res, *res_second;
 	acpi_handle handle;
 	acpi_status status;
 	unsigned long long tmp;
+	struct resource *res;
 	int rv = -EINVAL;
 
 	if (!si_tryacpi)
-	       return 0;
+		return -ENODEV;
 
 	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle)
@@ -2734,35 +2730,11 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 		goto err_free;
 	}
 
-	res = platform_get_resource(dev, IORESOURCE_IO, 0);
-	if (res) {
-		info->io_setup = port_setup;
-		info->io.addr_type = IPMI_IO_ADDR_SPACE;
-	} else {
-		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-		if (res) {
-			info->io_setup = mem_setup;
-			info->io.addr_type = IPMI_MEM_ADDR_SPACE;
-		}
-	}
+	res = ipmi_get_info_from_resources(dev, info);
 	if (!res) {
-		dev_err(&dev->dev, "no I/O or memory address\n");
+		rv = -EINVAL;
 		goto err_free;
 	}
-	info->io.addr_data = res->start;
-
-	info->io.regspacing = DEFAULT_REGSPACING;
-	res_second = platform_get_resource(dev,
-			       (info->io.addr_type == IPMI_IO_ADDR_SPACE) ?
-					IORESOURCE_IO : IORESOURCE_MEM,
-			       1);
-	if (res_second) {
-		if (res_second->start > info->io.addr_data)
-			info->io.regspacing =
-				res_second->start - info->io.addr_data;
-	}
-	info->io.regsize = DEFAULT_REGSPACING;
-	info->io.regshift = 0;
 
 	/* If _GPE exists, use it; otherwise use standard interrupts */
 	status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp);
@@ -2778,6 +2750,8 @@ static int acpi_ipmi_probe(struct platform_device *dev)
 		}
 	}
 
+	info->slave_addr = find_slave_address(info, info->slave_addr);
+
 	info->dev = &dev->dev;
 	platform_set_drvdata(dev, info);
 
@@ -2813,7 +2787,10 @@ static int ipmi_probe(struct platform_device *dev)
 	if (of_ipmi_probe(dev) == 0)
 		return 0;
 
-	return acpi_ipmi_probe(dev);
+	if (acpi_ipmi_probe(dev) == 0)
+		return 0;
+
+	return dmi_ipmi_probe(dev);
 }
 
 static int ipmi_remove(struct platform_device *dev)
@@ -3786,11 +3763,6 @@ static int init_ipmi_si(void)
 	}
 #endif
 
-#ifdef CONFIG_DMI
-	if (si_trydmi)
-		dmi_find_bmc();
-#endif
-
 #ifdef CONFIG_ACPI
 	if (si_tryacpi)
 		spmi_find_bmc();
@@ -3938,6 +3910,7 @@ static void cleanup_ipmi_si(void)
 }
 module_exit(cleanup_ipmi_si);
 
+MODULE_ALIAS("platform:dmi-ipmi-si");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>");
 MODULE_DESCRIPTION("Interface to the IPMI driver for the KCS, SMIC, and BT"

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 0b22a9b..0aea3bc 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c

@@ -53,6 +53,7 @@
 #include <linux/acpi.h>
 #include <linux/ctype.h>
 #include <linux/time64.h>
+#include "ipmi_dmi.h"
 
 #define PFX "ipmi_ssif: "
 #define DEVICE_NAME "ipmi_ssif"
@@ -180,6 +181,8 @@ struct ssif_addr_info {
 	int slave_addr;
 	enum ipmi_addr_src addr_src;
 	union ipmi_smi_info_union addr_info;
+	struct device *dev;
+	struct i2c_client *client;
 
 	struct mutex clients_mutex;
 	struct list_head clients;
@@ -408,6 +411,7 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
 		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
 		return;
 	}
 
@@ -430,6 +434,7 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info,
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
 		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
 		return;
 	}
 
@@ -761,6 +766,11 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			       result, len, data[2]);
 		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
 			   || data[1] != IPMI_GET_MSG_FLAGS_CMD) {
+			/*
+			 * Don't abort here, maybe it was a queued
+			 * response to a previous command.
+			 */
+			ipmi_ssif_unlock_cond(ssif_info, flags);
 			pr_warn(PFX "Invalid response getting flags: %x %x\n",
 				data[0], data[1]);
 		} else {
@@ -1094,7 +1104,7 @@ static int inc_usecount(void *send_info)
 {
 	struct ssif_info *ssif_info = send_info;
 
-	if (!i2c_get_adapter(ssif_info->client->adapter->nr))
+	if (!i2c_get_adapter(i2c_adapter_id(ssif_info->client->adapter)))
 		return -ENODEV;
 
 	i2c_use_client(ssif_info->client);
@@ -1169,6 +1179,7 @@ static LIST_HEAD(ssif_infos);
 static int ssif_remove(struct i2c_client *client)
 {
 	struct ssif_info *ssif_info = i2c_get_clientdata(client);
+	struct ssif_addr_info *addr_info;
 	int rv;
 
 	if (!ssif_info)
@@ -1196,6 +1207,13 @@ static int ssif_remove(struct i2c_client *client)
 		kthread_stop(ssif_info->thread);
 	}
 
+	list_for_each_entry(addr_info, &ssif_infos, link) {
+		if (addr_info->client == client) {
+			addr_info->client = NULL;
+			break;
+		}
+	}
+
 	/*
 	 * No message can be outstanding now, we have removed the
 	 * upper layer and it permitted us to do so.
@@ -1404,28 +1422,13 @@ static bool check_acpi(struct ssif_info *ssif_info, struct device *dev)
 
 static int find_slave_address(struct i2c_client *client, int slave_addr)
 {
-	struct ssif_addr_info *info;
-
-	if (slave_addr)
-		return slave_addr;
-
-	/*
-	 * Came in without a slave address, search around to see if
-	 * the other sources have a slave address.  This lets us pick
-	 * up an SMBIOS slave address when using ACPI.
-	 */
-	list_for_each_entry(info, &ssif_infos, link) {
-		if (info->binfo.addr != client->addr)
-			continue;
-		if (info->adapter_name && client->adapter->name &&
-		    strcmp_nospace(info->adapter_name,
-				   client->adapter->name))
-			continue;
-		if (info->slave_addr) {
-			slave_addr = info->slave_addr;
-			break;
-		}
-	}
+#ifdef CONFIG_IPMI_DMI_DECODE
+	if (!slave_addr)
+		slave_addr = ipmi_dmi_get_slave_addr(
+			IPMI_DMI_TYPE_SSIF,
+			i2c_adapter_id(client->adapter),
+			client->addr);
+#endif
 
 	return slave_addr;
 }
@@ -1447,7 +1450,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	u8		  slave_addr = 0;
 	struct ssif_addr_info *addr_info = NULL;
 
-
 	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
 	if (!resp)
 		return -ENOMEM;
@@ -1468,6 +1470,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			ssif_info->addr_source = addr_info->addr_src;
 			ssif_info->ssif_debug = addr_info->debug;
 			ssif_info->addr_info = addr_info->addr_info;
+			addr_info->client = client;
 			slave_addr = addr_info->slave_addr;
 		}
 	}
@@ -1664,7 +1667,8 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	{
 		unsigned int thread_num;
 
-		thread_num = ((ssif_info->client->adapter->nr << 8) |
+		thread_num = ((i2c_adapter_id(ssif_info->client->adapter)
+			       << 8) |
 			      ssif_info->client->addr);
 		init_completion(&ssif_info->wake_thread);
 		ssif_info->thread = kthread_run(ipmi_ssif_thread, ssif_info,
@@ -1705,8 +1709,19 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	}
 
  out:
-	if (rv)
+	if (rv) {
+		/*
+		 * Note that if addr_info->client is assigned, we
+		 * leave it.  The i2c client hangs around even if we
+		 * return a failure here, and the failure here is not
+		 * propagated back to the i2c code.  This seems to be
+		 * design intent, strange as it may be.  But if we
+		 * don't leave it, ssif_platform_remove will not remove
+		 * the client like it should.
+		 */
+		dev_err(&client->dev, "Unable to start IPMI SSIF: %d\n", rv);
 		kfree(ssif_info);
+	}
 	kfree(resp);
 	return rv;
 
@@ -1731,7 +1746,8 @@ static int ssif_adapter_handler(struct device *adev, void *opaque)
 
 static int new_ssif_client(int addr, char *adapter_name,
 			   int debug, int slave_addr,
-			   enum ipmi_addr_src addr_src)
+			   enum ipmi_addr_src addr_src,
+			   struct device *dev)
 {
 	struct ssif_addr_info *addr_info;
 	int rv = 0;
@@ -1764,6 +1780,10 @@ static int new_ssif_client(int addr, char *adapter_name,
 	addr_info->debug = debug;
 	addr_info->slave_addr = slave_addr;
 	addr_info->addr_src = addr_src;
+	addr_info->dev = dev;
+
+	if (dev)
+		dev_set_drvdata(dev, addr_info);
 
 	list_add_tail(&addr_info->link, &ssif_infos);
 
@@ -1902,7 +1922,7 @@ static int try_init_spmi(struct SPMITable *spmi)
 
 	myaddr = spmi->addr.address & 0x7f;
 
-	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI);
+	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI, NULL);
 }
 
 static void spmi_find_bmc(void)
@@ -1931,48 +1951,40 @@ static void spmi_find_bmc(void) { }
 #endif
 
 #ifdef CONFIG_DMI
-static int decode_dmi(const struct dmi_device *dmi_dev)
+static int dmi_ipmi_probe(struct platform_device *pdev)
 {
-	struct dmi_header *dm = dmi_dev->device_data;
-	u8             *data = (u8 *) dm;
-	u8             len = dm->length;
-	unsigned short myaddr;
-	int            slave_addr;
+	u8 type, slave_addr = 0;
+	u16 i2c_addr;
+	int rv;
 
-	if (num_addrs >= MAX_SSIF_BMCS)
-		return -1;
+	if (!ssif_trydmi)
+		return -ENODEV;
 
-	if (len < 9)
-		return -1;
+	rv = device_property_read_u8(&pdev->dev, "ipmi-type", &type);
+	if (rv)
+		return -ENODEV;
 
-	if (data[0x04] != 4) /* Not SSIF */
-		return -1;
+	if (type != IPMI_DMI_TYPE_SSIF)
+		return -ENODEV;
 
-	if ((data[8] >> 1) == 0) {
-		/*
-		 * Some broken systems put the I2C address in
-		 * the slave address field.  We try to
-		 * accommodate them here.
-		 */
-		myaddr = data[6] >> 1;
-		slave_addr = 0;
-	} else {
-		myaddr = data[8] >> 1;
-		slave_addr = data[6];
+	rv = device_property_read_u16(&pdev->dev, "i2c-addr", &i2c_addr);
+	if (rv) {
+		dev_warn(&pdev->dev, PFX "No i2c-addr property\n");
+		return -ENODEV;
 	}
 
-	return new_ssif_client(myaddr, NULL, 0, slave_addr, SI_SMBIOS);
-}
+	rv = device_property_read_u8(&pdev->dev, "slave-addr", &slave_addr);
+	if (rv)
+		dev_warn(&pdev->dev, "device has no slave-addr property");
 
-static void dmi_iterator(void)
-{
-	const struct dmi_device *dev = NULL;
-
-	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
-		decode_dmi(dev);
+	return new_ssif_client(i2c_addr, NULL, 0,
+			       slave_addr, SI_SMBIOS, &pdev->dev);
 }
 #else
-static void dmi_iterator(void) { }
+static int dmi_ipmi_probe(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
 #endif
 
 static const struct i2c_device_id ssif_id[] = {
@@ -1993,6 +2005,36 @@ static struct i2c_driver ssif_i2c_driver = {
 	.detect		= ssif_detect
 };
 
+static int ssif_platform_probe(struct platform_device *dev)
+{
+	return dmi_ipmi_probe(dev);
+}
+
+static int ssif_platform_remove(struct platform_device *dev)
+{
+	struct ssif_addr_info *addr_info = dev_get_drvdata(&dev->dev);
+
+	if (!addr_info)
+		return 0;
+
+	mutex_lock(&ssif_infos_mutex);
+	if (addr_info->client)
+		i2c_unregister_device(addr_info->client);
+
+	list_del(&addr_info->link);
+	kfree(addr_info);
+	mutex_unlock(&ssif_infos_mutex);
+	return 0;
+}
+
+static struct platform_driver ipmi_driver = {
+	.driver = {
+		.name = DEVICE_NAME,
+	},
+	.probe		= ssif_platform_probe,
+	.remove		= ssif_platform_remove,
+};
+
 static int init_ipmi_ssif(void)
 {
 	int i;
@@ -2007,7 +2049,7 @@ static int init_ipmi_ssif(void)
 	for (i = 0; i < num_addrs; i++) {
 		rv = new_ssif_client(addr[i], adapter_name[i],
 				     dbg[i], slave_addrs[i],
-				     SI_HARDCODED);
+				     SI_HARDCODED, NULL);
 		if (rv)
 			pr_err(PFX
 			       "Couldn't add hardcoded device at addr 0x%x\n",
@@ -2017,11 +2059,16 @@ static int init_ipmi_ssif(void)
 	if (ssif_tryacpi)
 		ssif_i2c_driver.driver.acpi_match_table	=
 			ACPI_PTR(ssif_acpi_match);
-	if (ssif_trydmi)
-		dmi_iterator();
+
 	if (ssif_tryacpi)
 		spmi_find_bmc();
 
+	if (ssif_trydmi) {
+		rv = platform_driver_register(&ipmi_driver);
+		if (rv)
+			pr_err(PFX "Unable to register driver: %d\n", rv);
+	}
+
 	ssif_i2c_driver.address_list = ssif_address_list();
 
 	rv = i2c_add_driver(&ssif_i2c_driver);
@@ -2041,10 +2088,13 @@ static void cleanup_ipmi_ssif(void)
 
 	i2c_del_driver(&ssif_i2c_driver);
 
+	platform_driver_unregister(&ipmi_driver);
+
 	free_ssif_clients();
 }
 module_exit(cleanup_ipmi_ssif);
 
+MODULE_ALIAS("platform:dmi-ipmi-ssif");
 MODULE_AUTHOR("Todd C Davis <todd.c.davis@intel.com>, Corey Minyard <minyard@acm.org>");
 MODULE_DESCRIPTION("IPMI driver for management controllers on a SMBus");
 MODULE_LICENSE("GPL");

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index a5c6cfe..3d832d0 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c

@@ -1163,10 +1163,11 @@ static int wdog_reboot_handler(struct notifier_block *this,
 			ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
 			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
 		} else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
-			/* Set a long timer to let the reboot happens, but
-			   reboot if it hangs, but only if the watchdog
+			/* Set a long timer to let the reboot happen or
+			   reset if it hangs, but only if the watchdog
 			   timer was already running. */
-			timeout = 120;
+			if (timeout < 120)
+				timeout = 120;
 			pretimeout = 0;
 			ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
 			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 01a260f..23cab7a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c

@@ -987,6 +987,11 @@ void add_device_randomness(const void *buf, unsigned int size)
 	unsigned long time = random_get_entropy() ^ jiffies;
 	unsigned long flags;
 
+	if (!crng_ready()) {
+		crng_fast_load(buf, size);
+		return;
+	}
+
 	trace_add_device_randomness(size, _RET_IP_);
 	spin_lock_irqsave(&input_pool.lock, flags);
 	_mix_pool_bytes(&input_pool, buf, size);

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 4180422..ea6d625 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c

@@ -540,7 +540,7 @@ static void bL_cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(cdev[cur_cluster])) {
 			dev_err(cpu_dev,
 				"running cpufreq without cooling device: %ld\n",

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index c943787..fef3c21 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c

@@ -326,7 +326,7 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
 				     &power_coefficient);
 
 		priv->cdev = of_cpufreq_power_cooling_register(np,
-				policy->related_cpus, power_coefficient, NULL);
+				policy, power_coefficient, NULL);
 		if (IS_ERR(priv->cdev)) {
 			dev_err(priv->cpu_dev,
 				"running cpufreq without cooling device: %ld\n",

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index f570ead..e75880e 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c

@@ -135,7 +135,7 @@ static struct attribute *default_attrs[] = {
 	&trans_table.attr,
 	NULL
 };
-static struct attribute_group stats_attr_group = {
+static const struct attribute_group stats_attr_group = {
 	.attrs = default_attrs,
 	.name = "stats"
 };
@@ -170,11 +170,10 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	unsigned int i = 0, count = 0, ret = -ENOMEM;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
-	struct cpufreq_frequency_table *pos, *table;
+	struct cpufreq_frequency_table *pos;
 
-	/* We need cpufreq table for creating stats table */
-	table = policy->freq_table;
-	if (unlikely(!table))
+	count = cpufreq_table_count_valid_entries(policy);
+	if (!count)
 		return;
 
 	/* stats already initialized */
@@ -185,10 +184,6 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	if (!stats)
 		return;
 
-	/* Find total allocation size */
-	cpufreq_for_each_valid_entry(pos, table)
-		count++;
-
 	alloc_size = count * sizeof(int) + count * sizeof(u64);
 
 	alloc_size += count * count * sizeof(int);
@@ -205,7 +200,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->max_state = count;
 
 	/* Find valid-unique entries */
-	cpufreq_for_each_valid_entry(pos, table)
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
 		if (freq_table_get_index(stats, pos->frequency) == -1)
 			stats->freq_table[i++] = pos->frequency;
 

diff --git a/drivers/cpufreq/dbx500-cpufreq.c b/drivers/cpufreq/dbx500-cpufreq.c
index 3575b82..4ee0431 100644
--- a/drivers/cpufreq/dbx500-cpufreq.c
+++ b/drivers/cpufreq/dbx500-cpufreq.c

@@ -43,7 +43,7 @@ static int dbx500_cpufreq_exit(struct cpufreq_policy *policy)
 
 static void dbx500_cpufreq_ready(struct cpufreq_policy *policy)
 {
-	cdev = cpufreq_cooling_register(policy->cpus);
+	cdev = cpufreq_cooling_register(policy);
 	if (IS_ERR(cdev))
 		pr_err("Failed to register cooling device %ld\n", PTR_ERR(cdev));
 	else

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 48a98f1..d6f3235 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -1214,7 +1214,7 @@ static struct attribute *intel_pstate_attributes[] = {
 	NULL
 };
 
-static struct attribute_group intel_pstate_attr_group = {
+static const struct attribute_group intel_pstate_attr_group = {
 	.attrs = intel_pstate_attributes,
 };
 

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index fd1886f..f9f00fb 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c

@@ -320,9 +320,7 @@ static void mtk_cpufreq_ready(struct cpufreq_policy *policy)
 		of_property_read_u32(np, DYNAMIC_POWER, &capacitance);
 
 		info->cdev = of_cpufreq_power_cooling_register(np,
-						policy->related_cpus,
-						capacitance,
-						NULL);
+						policy, capacitance, NULL);
 
 		if (IS_ERR(info->cdev)) {
 			dev_err(info->cpu_dev,

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index e2ea433..4ada55b 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c

@@ -278,8 +278,7 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
 	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
 
 	if (of_find_property(np, "#cooling-cells", NULL)) {
-		cpud->cdev = of_cpufreq_cooling_register(np,
-							 policy->related_cpus);
+		cpud->cdev = of_cpufreq_cooling_register(np, policy);
 
 		if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
 			pr_err("cpu%d is not running as cooling device: %ld\n",

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index f742596..37e24f5 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile

@@ -17,6 +17,7 @@
 cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
 
 KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
+				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector)
 

diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index 99f9a4b..67fe19e 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c

@@ -161,7 +161,7 @@ struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel,
 	int ret;
 
 	if (!panel)
-		return ERR_PTR(EINVAL);
+		return ERR_PTR(-EINVAL);
 
 	panel_bridge = devm_kzalloc(panel->dev, sizeof(*panel_bridge),
 				    GFP_KERNEL);

diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index fc8ef42..b3ef4f1 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c

@@ -832,6 +832,7 @@ static int atomic_remove_fb(struct drm_framebuffer *fb)
 		drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
 		drm_modeset_backoff(&ctx);
 		goto retry;
 	}

diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 0b2d8c4..d1f2028 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c

@@ -112,6 +112,9 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
 	v32.version_major = v.version_major;
 	v32.version_minor = v.version_minor;
 	v32.version_patchlevel = v.version_patchlevel;
+	v32.name_len = v.name_len;
+	v32.date_len = v.date_len;
+	v32.desc_len = v.desc_len;
 	if (copy_to_user((void __user *)arg, &v32, sizeof(v32)))
 		return -EFAULT;
 	return 0;

diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 463e4d8..e9f33cd 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c

@@ -242,7 +242,7 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 	 * Otherwise reinitialize delayed at next vblank interrupt and assign 0
 	 * for now, to mark the vblanktimestamp as invalid.
 	 */
-	if (!rc && in_vblank_irq)
+	if (!rc && !in_vblank_irq)
 		t_vblank = (struct timeval) {0, 0};
 
 	store_vblank(dev, pipe, diff, &t_vblank, cur_vblank);

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 51241de5..713848c 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c

@@ -2536,6 +2536,11 @@ static int scan_workload(struct intel_vgpu_workload *workload)
 		gma_head == gma_tail)
 		return 0;
 
+	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
 		goto out;
@@ -2579,6 +2584,11 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
 	s.workload = workload;
 
+	if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
 		goto out;

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index e0261fc..2deb05f 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c

@@ -197,6 +197,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_B << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
@@ -211,6 +217,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_C << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
@@ -225,6 +237,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_D << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
@@ -244,6 +262,10 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 
 		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
 	}
+
+	/* Clear host CRT status, so guest couldn't detect this host CRT. */
+	if (IS_BROADWELL(dev_priv))
+		vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
 }
 
 static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 66374db..6166e34 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c

@@ -2259,6 +2259,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
 		ret = setup_spt_oos(gvt);
 		if (ret) {
 			gvt_err("fail to initialize SPT oos\n");
+			dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+			__free_page(gvt->gtt.scratch_ggtt_page);
 			return ret;
 		}
 	}

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 1414d7e..17febe8 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c

@@ -367,21 +367,24 @@ static int lcpll_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 static int dpy_reg_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
-	*(u32 *)p_data = (1 << 17);
-	return 0;
-}
+	switch (offset) {
+	case 0xe651c:
+	case 0xe661c:
+	case 0xe671c:
+	case 0xe681c:
+		vgpu_vreg(vgpu, offset) = 1 << 17;
+		break;
+	case 0xe6c04:
+		vgpu_vreg(vgpu, offset) = 0x3;
+		break;
+	case 0xe6e1c:
+		vgpu_vreg(vgpu, offset) = 0x2f << 16;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-static int dpy_reg_mmio_read_2(struct intel_vgpu *vgpu, unsigned int offset,
-		void *p_data, unsigned int bytes)
-{
-	*(u32 *)p_data = 3;
-	return 0;
-}
-
-static int dpy_reg_mmio_read_3(struct intel_vgpu *vgpu, unsigned int offset,
-		void *p_data, unsigned int bytes)
-{
-	*(u32 *)p_data = (0x2f << 16);
+	read_vreg(vgpu, offset, p_data, bytes);
 	return 0;
 }
 
@@ -1925,7 +1928,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
 		dp_aux_ch_ctl_mmio_write);
 
-	MMIO_RO(PCH_ADPA, D_ALL, 0, ADPA_CRT_HOTPLUG_MONITOR_MASK, NULL, pch_adpa_mmio_write);
+	MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write);
 
 	MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write);
 	MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write);
@@ -2011,8 +2014,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL);
 	MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL);
 	MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read_2, NULL);
-	MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read_3, NULL);
+	MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL);
 
 	MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0,
 		PORTA_HOTPLUG_STATUS_MASK

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ae0b40..fd0c85f 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c

@@ -232,16 +232,20 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 	struct device *dev = mdev_dev(vgpu->vdev.mdev);
 	unsigned long gfn;
 
-	mutex_lock(&vgpu->vdev.cache_lock);
-	while ((node = rb_first(&vgpu->vdev.cache))) {
+	for (;;) {
+		mutex_lock(&vgpu->vdev.cache_lock);
+		node = rb_first(&vgpu->vdev.cache);
+		if (!node) {
+			mutex_unlock(&vgpu->vdev.cache_lock);
+			break;
+		}
 		dma = rb_entry(node, struct gvt_dma, node);
 		gvt_dma_unmap_iova(vgpu, dma->iova);
 		gfn = dma->gfn;
-
-		vfio_unpin_pages(dev, &gfn, 1);
 		__gvt_cache_remove_entry(vgpu, dma);
+		mutex_unlock(&vgpu->vdev.cache_lock);
+		vfio_unpin_pages(dev, &gfn, 1);
 	}
-	mutex_unlock(&vgpu->vdev.cache_lock);
 }
 
 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 488fdea..4f7057d 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c

@@ -174,15 +174,6 @@ static int shadow_context_status_change(struct notifier_block *nb,
 		atomic_set(&workload->shadow_ctx_active, 1);
 		break;
 	case INTEL_CONTEXT_SCHEDULE_OUT:
-		/* If the status is -EINPROGRESS means this workload
-		 * doesn't meet any issue during dispatching so when
-		 * get the SCHEDULE_OUT set the status to be zero for
-		 * good. If the status is NOT -EINPROGRESS means there
-		 * is something wrong happened during dispatching and
-		 * the status should not be set to zero
-		 */
-		if (workload->status == -EINPROGRESS)
-			workload->status = 0;
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
 	default:
@@ -427,6 +418,18 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
+		/* If this request caused GPU hang, req->fence.error will
+		 * be set to -EIO. Use -EIO to set workload status so
+		 * that when this request caused GPU hang, didn't trigger
+		 * context switch interrupt to guest.
+		 */
+		if (likely(workload->status == -EINPROGRESS)) {
+			if (workload->req->fence.error == -EIO)
+				workload->status = -EIO;
+			else
+				workload->status = 0;
+		}
+
 		i915_gem_request_put(fetch_and_zero(&workload->req));
 
 		if (!workload->status && !vgpu->resetting) {
@@ -464,8 +467,6 @@ struct workload_thread_param {
 	int ring_id;
 };
 
-static DEFINE_MUTEX(scheduler_mutex);
-
 static int workload_thread(void *priv)
 {
 	struct workload_thread_param *p = (struct workload_thread_param *)priv;
@@ -497,8 +498,6 @@ static int workload_thread(void *priv)
 		if (!workload)
 			break;
 
-		mutex_lock(&scheduler_mutex);
-
 		gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
 				workload->ring_id, workload,
 				workload->vgpu->id);
@@ -537,9 +536,6 @@ static int workload_thread(void *priv)
 					FORCEWAKE_ALL);
 
 		intel_runtime_pm_put(gvt->dev_priv);
-
-		mutex_unlock(&scheduler_mutex);
-
 	}
 	return 0;
 }

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3f44076..00d8967 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -3087,7 +3087,7 @@ static void intel_connector_info(struct seq_file *m,
 			   connector->display_info.cea_rev);
 	}
 
-	if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
+	if (!intel_encoder)
 		return;
 
 	switch (connector->connector_type) {

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ee2325b..fc307e03 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c

@@ -1132,10 +1132,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 * and the registers being closely associated.
 	 *
 	 * According to chipset errata, on the 965GM, MSI interrupts may
-	 * be lost or delayed, but we use them anyways to avoid
-	 * stuck interrupts on some machines.
+	 * be lost or delayed, and was defeatured. MSI interrupts seem to
+	 * get lost on g4x as well, and interrupt delivery seems to stay
+	 * properly dead afterwards. So we'll just disable them for all
+	 * pre-gen5 chipsets.
 	 */
-	if (!IS_I945G(dev_priv) && !IS_I945GM(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 5) {
 		if (pci_enable_msi(pdev) < 0)
 			DRM_DEBUG_DRIVER("can't enable MSI");
 	}

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7dcac3b..969bac8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -2434,8 +2434,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 				 * again with !__GFP_NORETRY. However, we still
 				 * want to fail this allocation rather than
 				 * trigger the out-of-memory killer and for
-				 * this we want the future __GFP_MAYFAIL.
+				 * this we want __GFP_RETRY_MAYFAIL.
 				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
 			}
 		} while (1);
 

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9337446..054b2e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -288,20 +288,26 @@ static int eb_create(struct i915_execbuffer *eb)
 		 * direct lookup.
 		 */
 		do {
+			unsigned int flags;
+
+			/* While we can still reduce the allocation size, don't
+			 * raise a warning and allow the allocation to fail.
+			 * On the last pass though, we want to try as hard
+			 * as possible to perform the allocation and warn
+			 * if it fails.
+			 */
+			flags = GFP_TEMPORARY;
+			if (size > 1)
+				flags |= __GFP_NORETRY | __GFP_NOWARN;
+
 			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
-					      GFP_TEMPORARY |
-					      __GFP_NORETRY |
-					      __GFP_NOWARN);
+					      flags);
 			if (eb->buckets)
 				break;
 		} while (--size);
 
-		if (unlikely(!eb->buckets)) {
-			eb->buckets = kzalloc(sizeof(struct hlist_head),
-					      GFP_TEMPORARY);
-			if (unlikely(!eb->buckets))
-				return -ENOMEM;
-		}
+		if (unlikely(!size))
+			return -ENOMEM;
 
 		eb->lut_size = size;
 	} else {
@@ -452,7 +458,7 @@ eb_add_vma(struct i915_execbuffer *eb,
 			return err;
 	}
 
-	if (eb->lut_size >= 0) {
+	if (eb->lut_size > 0) {
 		vma->exec_handle = entry->handle;
 		hlist_add_head(&vma->exec_node,
 			       &eb->buckets[hash_32(entry->handle,
@@ -894,7 +900,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
 static void eb_reset_vmas(const struct i915_execbuffer *eb)
 {
 	eb_release_vmas(eb);
-	if (eb->lut_size >= 0)
+	if (eb->lut_size > 0)
 		memset(eb->buckets, 0,
 		       sizeof(struct hlist_head) << eb->lut_size);
 }
@@ -903,7 +909,7 @@ static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
 
-	if (eb->lut_size >= 0)
+	if (eb->lut_size > 0)
 		kfree(eb->buckets);
 }
 
@@ -2180,8 +2186,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	if (eb_create(&eb))
-		return -ENOMEM;
+	err = eb_create(&eb);
+	if (err)
+		goto err_out_fence;
+
+	GEM_BUG_ON(!eb.lut_size);
 
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
@@ -2340,6 +2349,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_rpm:
 	intel_runtime_pm_put(eb.i915);
 	eb_destroy(&eb);
+err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
 err_in_fence:

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 38c4440..9cd22f8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c

@@ -2067,10 +2067,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 			return ret;
 	}
 
-	ret = alloc_oa_buffer(dev_priv);
-	if (ret)
-		goto err_oa_buf_alloc;
-
 	/* PRM - observability performance counters:
 	 *
 	 *   OACONTROL, performance counter enable, note:
@@ -2086,6 +2082,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
+	ret = alloc_oa_buffer(dev_priv);
+	if (ret)
+		goto err_oa_buf_alloc;
+
 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
 	if (ret)
 		goto err_enable;
@@ -2097,11 +2097,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
 	free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_runtime_pm_put(dev_priv);
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c8647cf..64cc674 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h

@@ -1802,7 +1802,7 @@ enum skl_disp_power_wells {
 #define   POST_CURSOR_2(x)		((x) << 6)
 #define   POST_CURSOR_2_MASK		(0x3F << 6)
 #define   CURSOR_COEFF(x)		((x) << 0)
-#define   CURSOR_COEFF_MASK		(0x3F << 6)
+#define   CURSOR_COEFF_MASK		(0x3F << 0)
 
 #define _CNL_PORT_TX_DW5_GRP_AE		0x162354
 #define _CNL_PORT_TX_DW5_GRP_B		0x1623D4

diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index b8914db..1241e58 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c

@@ -491,6 +491,14 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	int cdclk = cdclk_state->cdclk;
 	u32 val, cmd;
 
+	/* There are cases where we can end up here with power domains
+	 * off and a CDCLK frequency other than the minimum, like when
+	 * issuing a modeset without actually changing any display after
+	 * a system suspend.  So grab the PIPE-A domain, which covers
+	 * the HW blocks needed for the following programming.
+	 */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+
 	if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */
 		cmd = 2;
 	else if (cdclk == 266667)
@@ -549,6 +557,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_update_cdclk(dev_priv);
 
 	vlv_program_pfi_credits(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
 }
 
 static void chv_set_cdclk(struct drm_i915_private *dev_priv,
@@ -568,6 +578,14 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 		return;
 	}
 
+	/* There are cases where we can end up here with power domains
+	 * off and a CDCLK frequency other than the minimum, like when
+	 * issuing a modeset without actually changing any display after
+	 * a system suspend.  So grab the PIPE-A domain, which covers
+	 * the HW blocks needed for the following programming.
+	 */
+	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
+
 	/*
 	 * Specs are full of misinformation, but testing on actual
 	 * hardware has shown that we just need to write the desired
@@ -590,6 +608,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_update_cdclk(dev_priv);
 
 	vlv_program_pfi_credits(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A);
 }
 
 static int bdw_calc_cdclk(int max_pixclk)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a4487c5..5b4de71 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c

@@ -821,9 +821,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
-	/* WaDisableKillLogic:bxt,skl,kbl,cfl */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-		   ECOCHK_DIS_TLB);
+	/* WaDisableKillLogic:bxt,skl,kbl */
+	if (!IS_COFFEELAKE(dev_priv))
+		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+			   ECOCHK_DIS_TLB);
 
 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
@@ -894,10 +895,9 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine)
 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
 			  HDC_FORCE_NON_COHERENT);
 
-	/* WaDisableHDCInvalidation:skl,bxt,kbl */
-	if (!IS_COFFEELAKE(dev_priv))
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   BDW_DISABLE_HDC_INVALIDATION);
+	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
 
 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
 	if (IS_SKYLAKE(dev_priv) ||

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 03347c6..0c4cde6 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c

@@ -535,14 +535,15 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev)
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	if (ifbdev->fb) {
+	if (ifbdev->vma) {
 		mutex_lock(&ifbdev->helper.dev->struct_mutex);
 		intel_unpin_fb_vma(ifbdev->vma);
 		mutex_unlock(&ifbdev->helper.dev->struct_mutex);
-
-		drm_framebuffer_remove(&ifbdev->fb->base);
 	}
 
+	if (ifbdev->fb)
+		drm_framebuffer_remove(&ifbdev->fb->base);
+
 	kfree(ifbdev);
 }
 
@@ -765,7 +766,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
 	struct fb_info *info;
 
-	if (!ifbdev || !ifbdev->fb)
+	if (!ifbdev || !ifbdev->vma)
 		return;
 
 	info = ifbdev->helper.fbdev;
@@ -812,7 +813,7 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev)
 {
 	struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
 
-	if (ifbdev && ifbdev->fb)
+	if (ifbdev && ifbdev->vma)
 		drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
@@ -824,7 +825,7 @@ void intel_fbdev_restore_mode(struct drm_device *dev)
 		return;
 
 	intel_fbdev_sync(ifbdev);
-	if (!ifbdev->fb)
+	if (!ifbdev->vma)
 		return;
 
 	if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index d15cc9d..89dc25a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c

@@ -246,9 +246,9 @@ static int igt_dmabuf_export_vmap(void *arg)
 	i915_gem_object_put(obj);
 
 	ptr = dma_buf_vmap(dmabuf);
-	if (IS_ERR(ptr)) {
-		err = PTR_ERR(ptr);
-		pr_err("dma_buf_vmap failed with err=%d\n", err);
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
 		goto out;
 	}
 

diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile
index bf2e5be..e37b55a 100644
--- a/drivers/gpu/drm/mediatek/Makefile
+++ b/drivers/gpu/drm/mediatek/Makefile

@@ -1,4 +1,5 @@
-mediatek-drm-y := mtk_disp_ovl.o \
+mediatek-drm-y := mtk_disp_color.o \
+		  mtk_disp_ovl.o \
 		  mtk_disp_rdma.o \
 		  mtk_drm_crtc.o \
 		  mtk_drm_ddp.o \

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c
new file mode 100644
index 0000000..ef79a6d
--- /dev/null
+++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c

@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <drm/drmP.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include "mtk_drm_crtc.h"
+#include "mtk_drm_ddp_comp.h"
+
+#define DISP_COLOR_CFG_MAIN			0x0400
+#define DISP_COLOR_START_MT2701			0x0f00
+#define DISP_COLOR_START_MT8173			0x0c00
+#define DISP_COLOR_START(comp)			((comp)->data->color_offset)
+#define DISP_COLOR_WIDTH(comp)			(DISP_COLOR_START(comp) + 0x50)
+#define DISP_COLOR_HEIGHT(comp)			(DISP_COLOR_START(comp) + 0x54)
+
+#define COLOR_BYPASS_ALL			BIT(7)
+#define COLOR_SEQ_SEL				BIT(13)
+
+struct mtk_disp_color_data {
+	unsigned int color_offset;
+};
+
+/**
+ * struct mtk_disp_color - DISP_COLOR driver structure
+ * @ddp_comp - structure containing type enum and hardware resources
+ * @crtc - associated crtc to report irq events to
+ */
+struct mtk_disp_color {
+	struct mtk_ddp_comp			ddp_comp;
+	struct drm_crtc				*crtc;
+	const struct mtk_disp_color_data	*data;
+};
+
+static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
+{
+	return container_of(comp, struct mtk_disp_color, ddp_comp);
+}
+
+static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
+			     unsigned int h, unsigned int vrefresh,
+			     unsigned int bpc)
+{
+	struct mtk_disp_color *color = comp_to_color(comp);
+
+	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
+	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
+}
+
+static void mtk_color_start(struct mtk_ddp_comp *comp)
+{
+	struct mtk_disp_color *color = comp_to_color(comp);
+
+	writel(COLOR_BYPASS_ALL | COLOR_SEQ_SEL,
+	       comp->regs + DISP_COLOR_CFG_MAIN);
+	writel(0x1, comp->regs + DISP_COLOR_START(color));
+}
+
+static const struct mtk_ddp_comp_funcs mtk_disp_color_funcs = {
+	.config = mtk_color_config,
+	.start = mtk_color_start,
+};
+
+static int mtk_disp_color_bind(struct device *dev, struct device *master,
+			       void *data)
+{
+	struct mtk_disp_color *priv = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	int ret;
+
+	ret = mtk_ddp_comp_register(drm_dev, &priv->ddp_comp);
+	if (ret < 0) {
+		dev_err(dev, "Failed to register component %s: %d\n",
+			dev->of_node->full_name, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mtk_disp_color_unbind(struct device *dev, struct device *master,
+				  void *data)
+{
+	struct mtk_disp_color *priv = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	mtk_ddp_comp_unregister(drm_dev, &priv->ddp_comp);
+}
+
+static const struct component_ops mtk_disp_color_component_ops = {
+	.bind	= mtk_disp_color_bind,
+	.unbind = mtk_disp_color_unbind,
+};
+
+static int mtk_disp_color_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_disp_color *priv;
+	int comp_id;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DISP_COLOR);
+	if (comp_id < 0) {
+		dev_err(dev, "Failed to identify by alias: %d\n", comp_id);
+		return comp_id;
+	}
+
+	ret = mtk_ddp_comp_init(dev, dev->of_node, &priv->ddp_comp, comp_id,
+				&mtk_disp_color_funcs);
+	if (ret) {
+		dev_err(dev, "Failed to initialize component: %d\n", ret);
+		return ret;
+	}
+
+	priv->data = of_device_get_match_data(dev);
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = component_add(dev, &mtk_disp_color_component_ops);
+	if (ret)
+		dev_err(dev, "Failed to add component: %d\n", ret);
+
+	return ret;
+}
+
+static int mtk_disp_color_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &mtk_disp_color_component_ops);
+
+	return 0;
+}
+
+static const struct mtk_disp_color_data mt2701_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT2701,
+};
+
+static const struct mtk_disp_color_data mt8173_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT8173,
+};
+
+static const struct of_device_id mtk_disp_color_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt2701-disp-color",
+	  .data = &mt2701_color_driver_data},
+	{ .compatible = "mediatek,mt8173-disp-color",
+	  .data = &mt8173_color_driver_data},
+	{},
+};
+MODULE_DEVICE_TABLE(of, mtk_disp_color_driver_dt_match);
+
+struct platform_driver mtk_disp_color_driver = {
+	.probe		= mtk_disp_color_probe,
+	.remove		= mtk_disp_color_remove,
+	.driver		= {
+		.name	= "mediatek-disp-color",
+		.owner	= THIS_MODULE,
+		.of_match_table = mtk_disp_color_driver_dt_match,
+	},
+};

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index a14d7d6..35bc5ba 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c

@@ -42,9 +42,12 @@
 #define	OVL_RDMA_MEM_GMC	0x40402020
 
 #define OVL_CON_BYTE_SWAP	BIT(24)
+#define OVL_CON_MTX_YUV_TO_RGB	(6 << 16)
 #define OVL_CON_CLRFMT_RGB	(1 << 12)
 #define OVL_CON_CLRFMT_RGBA8888	(2 << 12)
 #define OVL_CON_CLRFMT_ARGB8888	(3 << 12)
+#define OVL_CON_CLRFMT_UYVY	(4 << 12)
+#define OVL_CON_CLRFMT_YUYV	(5 << 12)
 #define OVL_CON_CLRFMT_RGB565(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
 					0 : OVL_CON_CLRFMT_RGB)
 #define OVL_CON_CLRFMT_RGB888(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
@@ -176,6 +179,10 @@ static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
 	case DRM_FORMAT_XBGR8888:
 	case DRM_FORMAT_ABGR8888:
 		return OVL_CON_CLRFMT_RGBA8888 | OVL_CON_BYTE_SWAP;
+	case DRM_FORMAT_UYVY:
+		return OVL_CON_CLRFMT_UYVY | OVL_CON_MTX_YUV_TO_RGB;
+	case DRM_FORMAT_YUYV:
+		return OVL_CON_CLRFMT_YUYV | OVL_CON_MTX_YUV_TO_RGB;
 	}
 }
 

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 6582e1f..cb32c93 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c

@@ -559,6 +559,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 	mtk_crtc->ddp_comp = devm_kmalloc_array(dev, mtk_crtc->ddp_comp_nr,
 						sizeof(*mtk_crtc->ddp_comp),
 						GFP_KERNEL);
+	if (!mtk_crtc->ddp_comp)
+		return -ENOMEM;
 
 	mtk_crtc->mutex = mtk_disp_mutex_get(priv->mutex_dev, pipe);
 	if (IS_ERR(mtk_crtc->mutex)) {

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 8b52416..07d7ea2 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c

@@ -38,13 +38,6 @@
 
 #define DISP_REG_UFO_START			0x0000
 
-#define DISP_COLOR_CFG_MAIN			0x0400
-#define DISP_COLOR_START_MT2701			0x0f00
-#define DISP_COLOR_START_MT8173			0x0c00
-#define DISP_COLOR_START(comp)			((comp)->data->color_offset)
-#define DISP_COLOR_WIDTH(comp)			(DISP_COLOR_START(comp) + 0x50)
-#define DISP_COLOR_HEIGHT(comp)			(DISP_COLOR_START(comp) + 0x54)
-
 #define DISP_AAL_EN				0x0000
 #define DISP_AAL_SIZE				0x0030
 
@@ -55,9 +48,6 @@
 
 #define LUT_10BIT_MASK				0x03ff
 
-#define COLOR_BYPASS_ALL			BIT(7)
-#define COLOR_SEQ_SEL				BIT(13)
-
 #define OD_RELAYMODE				BIT(0)
 
 #define UFO_BYPASS				BIT(2)
@@ -82,20 +72,6 @@
 #define DITHER_ADD_LSHIFT_G(x)			(((x) & 0x7) << 4)
 #define DITHER_ADD_RSHIFT_G(x)			(((x) & 0x7) << 0)
 
-struct mtk_disp_color_data {
-	unsigned int color_offset;
-};
-
-struct mtk_disp_color {
-	struct mtk_ddp_comp			ddp_comp;
-	const struct mtk_disp_color_data	*data;
-};
-
-static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
-{
-	return container_of(comp, struct mtk_disp_color, ddp_comp);
-}
-
 void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
 		    unsigned int CFG)
 {
@@ -119,25 +95,6 @@ void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
 	}
 }
 
-static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
-			     unsigned int h, unsigned int vrefresh,
-			     unsigned int bpc)
-{
-	struct mtk_disp_color *color = comp_to_color(comp);
-
-	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
-	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
-}
-
-static void mtk_color_start(struct mtk_ddp_comp *comp)
-{
-	struct mtk_disp_color *color = comp_to_color(comp);
-
-	writel(COLOR_BYPASS_ALL | COLOR_SEQ_SEL,
-	       comp->regs + DISP_COLOR_CFG_MAIN);
-	writel(0x1, comp->regs + DISP_COLOR_START(color));
-}
-
 static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w,
 			  unsigned int h, unsigned int vrefresh,
 			  unsigned int bpc)
@@ -229,11 +186,6 @@ static const struct mtk_ddp_comp_funcs ddp_gamma = {
 	.stop = mtk_gamma_stop,
 };
 
-static const struct mtk_ddp_comp_funcs ddp_color = {
-	.config = mtk_color_config,
-	.start = mtk_color_start,
-};
-
 static const struct mtk_ddp_comp_funcs ddp_od = {
 	.config = mtk_od_config,
 	.start = mtk_od_start,
@@ -268,8 +220,8 @@ struct mtk_ddp_comp_match {
 static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_AAL]	= { MTK_DISP_AAL,	0, &ddp_aal },
 	[DDP_COMPONENT_BLS]	= { MTK_DISP_BLS,	0, NULL },
-	[DDP_COMPONENT_COLOR0]	= { MTK_DISP_COLOR,	0, &ddp_color },
-	[DDP_COMPONENT_COLOR1]	= { MTK_DISP_COLOR,	1, &ddp_color },
+	[DDP_COMPONENT_COLOR0]	= { MTK_DISP_COLOR,	0, NULL },
+	[DDP_COMPONENT_COLOR1]	= { MTK_DISP_COLOR,	1, NULL },
 	[DDP_COMPONENT_DPI0]	= { MTK_DPI,		0, NULL },
 	[DDP_COMPONENT_DSI0]	= { MTK_DSI,		0, NULL },
 	[DDP_COMPONENT_DSI1]	= { MTK_DSI,		1, NULL },
@@ -286,22 +238,6 @@ static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_WDMA1]	= { MTK_DISP_WDMA,	1, NULL },
 };
 
-static const struct mtk_disp_color_data mt2701_color_driver_data = {
-	.color_offset = DISP_COLOR_START_MT2701,
-};
-
-static const struct mtk_disp_color_data mt8173_color_driver_data = {
-	.color_offset = DISP_COLOR_START_MT8173,
-};
-
-static const struct of_device_id mtk_disp_color_driver_dt_match[] = {
-	{ .compatible = "mediatek,mt2701-disp-color",
-	  .data = &mt2701_color_driver_data},
-	{ .compatible = "mediatek,mt8173-disp-color",
-	  .data = &mt8173_color_driver_data},
-	{},
-};
-
 int mtk_ddp_comp_get_id(struct device_node *node,
 			enum mtk_ddp_comp_type comp_type)
 {
@@ -324,23 +260,11 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 	enum mtk_ddp_comp_type type;
 	struct device_node *larb_node;
 	struct platform_device *larb_pdev;
-	const struct of_device_id *match;
-	struct mtk_disp_color *color;
 
 	if (comp_id < 0 || comp_id >= DDP_COMPONENT_ID_MAX)
 		return -EINVAL;
 
 	type = mtk_ddp_matches[comp_id].type;
-	if (type == MTK_DISP_COLOR) {
-		devm_kfree(dev, comp);
-		color = devm_kzalloc(dev, sizeof(*color), GFP_KERNEL);
-		if (!color)
-			return -ENOMEM;
-
-		match = of_match_node(mtk_disp_color_driver_dt_match, node);
-		color->data = match->data;
-		comp = &color->ddp_comp;
-	}
 
 	comp->id = comp_id;
 	comp->funcs = funcs ?: mtk_ddp_matches[comp_id].funcs;

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index f6c8ec4..41d2cff 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c

@@ -439,11 +439,12 @@ static int mtk_drm_probe(struct platform_device *pdev)
 		private->comp_node[comp_id] = of_node_get(node);
 
 		/*
-		 * Currently only the OVL, RDMA, DSI, and DPI blocks have
+		 * Currently only the COLOR, OVL, RDMA, DSI, and DPI blocks have
 		 * separate component platform drivers and initialize their own
 		 * DDP component structure. The others are initialized here.
 		 */
-		if (comp_type == MTK_DISP_OVL ||
+		if (comp_type == MTK_DISP_COLOR ||
+		    comp_type == MTK_DISP_OVL ||
 		    comp_type == MTK_DISP_RDMA ||
 		    comp_type == MTK_DSI ||
 		    comp_type == MTK_DPI) {
@@ -566,6 +567,7 @@ static struct platform_driver mtk_drm_platform_driver = {
 
 static struct platform_driver * const mtk_drm_drivers[] = {
 	&mtk_ddp_driver,
+	&mtk_disp_color_driver,
 	&mtk_disp_ovl_driver,
 	&mtk_disp_rdma_driver,
 	&mtk_dpi_driver,
@@ -576,33 +578,14 @@ static struct platform_driver * const mtk_drm_drivers[] = {
 
 static int __init mtk_drm_init(void)
 {
-	int ret;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mtk_drm_drivers); i++) {
-		ret = platform_driver_register(mtk_drm_drivers[i]);
-		if (ret < 0) {
-			pr_err("Failed to register %s driver: %d\n",
-			       mtk_drm_drivers[i]->driver.name, ret);
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	while (--i >= 0)
-		platform_driver_unregister(mtk_drm_drivers[i]);
-
-	return ret;
+	return platform_register_drivers(mtk_drm_drivers,
+					 ARRAY_SIZE(mtk_drm_drivers));
 }
 
 static void __exit mtk_drm_exit(void)
 {
-	int i;
-
-	for (i = ARRAY_SIZE(mtk_drm_drivers) - 1; i >= 0; i--)
-		platform_driver_unregister(mtk_drm_drivers[i]);
+	platform_unregister_drivers(mtk_drm_drivers,
+				    ARRAY_SIZE(mtk_drm_drivers));
 }
 
 module_init(mtk_drm_init);

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index aef8747..c3378c4 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h

@@ -59,6 +59,7 @@ struct mtk_drm_private {
 };
 
 extern struct platform_driver mtk_ddp_driver;
+extern struct platform_driver mtk_disp_color_driver;
 extern struct platform_driver mtk_disp_ovl_driver;
 extern struct platform_driver mtk_disp_rdma_driver;
 extern struct platform_driver mtk_dpi_driver;

diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index e405e89..1a59b9a 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c

@@ -28,6 +28,8 @@ static const u32 formats[] = {
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_RGB565,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
 };
 
 static void mtk_plane_reset(struct drm_plane *plane)

diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index b5cc6e1..97253c8 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c

@@ -930,7 +930,7 @@ static u32 mtk_dsi_recv_cnt(u8 type, u8 *read_data)
 		DRM_INFO("type is 0x02, try again\n");
 		break;
 	default:
-		DRM_INFO("type(0x%x) cannot be non-recognite\n", type);
+		DRM_INFO("type(0x%x) not recognized\n", type);
 		break;
 	}
 

diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 0a4ffd7..71eb4fb 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c

@@ -1778,33 +1778,14 @@ static struct platform_driver * const mtk_hdmi_drivers[] = {
 
 static int __init mtk_hdmitx_init(void)
 {
-	int ret;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mtk_hdmi_drivers); i++) {
-		ret = platform_driver_register(mtk_hdmi_drivers[i]);
-		if (ret < 0) {
-			pr_err("Failed to register %s driver: %d\n",
-			       mtk_hdmi_drivers[i]->driver.name, ret);
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	while (--i >= 0)
-		platform_driver_unregister(mtk_hdmi_drivers[i]);
-
-	return ret;
+	return platform_register_drivers(mtk_hdmi_drivers,
+					 ARRAY_SIZE(mtk_hdmi_drivers));
 }
 
 static void __exit mtk_hdmitx_exit(void)
 {
-	int i;
-
-	for (i = ARRAY_SIZE(mtk_hdmi_drivers) - 1; i >= 0; i--)
-		platform_driver_unregister(mtk_hdmi_drivers[i]);
+	platform_unregister_drivers(mtk_hdmi_drivers,
+				    ARRAY_SIZE(mtk_hdmi_drivers));
 }
 
 module_init(mtk_hdmitx_init);

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index fa4f8f0..e67ed38 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c

@@ -31,6 +31,7 @@
 #include "radeon_asic.h"
 #include "atom.h"
 #include <linux/backlight.h>
+#include <linux/dmi.h>
 
 extern int atom_debug;
 
@@ -2184,9 +2185,17 @@ int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder, int fe_idx)
 		goto assigned;
 	}
 
-	/* on DCE32 and encoder can driver any block so just crtc id */
+	/*
+	 * On DCE32 any encoder can drive any block so usually just use crtc id,
+	 * but Apple thinks different at least on iMac10,1, so there use linkb,
+	 * otherwise the internal eDP panel will stay dark.
+	 */
 	if (ASIC_IS_DCE32(rdev)) {
-		enc_idx = radeon_crtc->crtc_id;
+		if (dmi_match(DMI_PRODUCT_NAME, "iMac10,1"))
+			enc_idx = (dig->linkb) ? 1 : 0;
+		else
+			enc_idx = radeon_crtc->crtc_id;
+
 		goto assigned;
 	}
 

diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 14fa1f8..9b0b058 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c

@@ -1195,7 +1195,7 @@ static int cdn_dp_probe(struct platform_device *pdev)
 			continue;
 
 		port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
-		if (!dp)
+		if (!port)
 			return -ENOMEM;
 
 		port->extcon = extcon;

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 47905fa..c7e96b8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h

@@ -45,13 +45,13 @@ struct rockchip_crtc_state {
  *
  * @crtc: array of enabled CRTCs, used to map from "pipe" to drm_crtc.
  * @num_pipe: number of pipes for this device.
+ * @mm_lock: protect drm_mm on multi-threads.
  */
 struct rockchip_drm_private {
 	struct drm_fb_helper fbdev_helper;
 	struct drm_gem_object *fbdev_bo;
 	struct drm_atomic_state *state;
 	struct iommu_domain *domain;
-	/* protect drm_mm on multi-threads */
 	struct mutex mm_lock;
 	struct drm_mm mm;
 	struct list_head psr_list;

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index df9e570..b74ac71 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c

@@ -29,12 +29,11 @@ static int rockchip_gem_iommu_map(struct rockchip_gem_object *rk_obj)
 	ssize_t ret;
 
 	mutex_lock(&private->mm_lock);
-
 	ret = drm_mm_insert_node_generic(&private->mm, &rk_obj->mm,
 					 rk_obj->base.size, PAGE_SIZE,
 					 0, 0);
-
 	mutex_unlock(&private->mm_lock);
+
 	if (ret < 0) {
 		DRM_ERROR("out of I/O virtual memory: %zd\n", ret);
 		return ret;
@@ -56,7 +55,9 @@ static int rockchip_gem_iommu_map(struct rockchip_gem_object *rk_obj)
 	return 0;
 
 err_remove_node:
+	mutex_lock(&private->mm_lock);
 	drm_mm_remove_node(&rk_obj->mm);
+	mutex_unlock(&private->mm_lock);
 
 	return ret;
 }

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 687705c..3cd60f4 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig

@@ -388,6 +388,13 @@
 	To compile this driver as a module, choose M here: the
 	module will be called hid-icade.
 
+config HID_ITE
+	tristate "ITE devices"
+	depends on HID
+	default !EXPERT
+	---help---
+	Support for ITE devices not fully compliant with HID standard.
+
 config HID_TWINHAN
 	tristate "Twinhan IR remote control"
 	depends on HID
@@ -741,6 +748,14 @@
 	Support for Primax devices that are not fully compliant with the
 	HID standard.
 
+config HID_RETRODE
+	tristate "Retrode"
+	depends on USB_HID
+	---help---
+	Support for
+
+	  * Retrode 2 cartridge and controller adapter
+
 config HID_ROCCAT
 	tristate "Roccat device support"
 	depends on USB_HID

diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index fef027b..8659d7e 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile

@@ -50,6 +50,7 @@
 obj-$(CONFIG_HID_HOLTEK)	+= hid-holtekff.o
 obj-$(CONFIG_HID_HYPERV_MOUSE)	+= hid-hyperv.o
 obj-$(CONFIG_HID_ICADE)		+= hid-icade.o
+obj-$(CONFIG_HID_ITE)		+= hid-ite.o
 obj-$(CONFIG_HID_KENSINGTON)	+= hid-kensington.o
 obj-$(CONFIG_HID_KEYTOUCH)	+= hid-keytouch.o
 obj-$(CONFIG_HID_KYE)		+= hid-kye.o
@@ -81,6 +82,7 @@
 
 obj-$(CONFIG_HID_PLANTRONICS)	+= hid-plantronics.o
 obj-$(CONFIG_HID_PRIMAX)	+= hid-primax.o
+obj-$(CONFIG_HID_RETRODE)	+= hid-retrode.o
 obj-$(CONFIG_HID_ROCCAT)	+= hid-roccat.o hid-roccat-common.o \
 	hid-roccat-arvo.o hid-roccat-isku.o hid-roccat-kone.o \
 	hid-roccat-koneplus.o hid-roccat-konepure.o hid-roccat-kovaplus.o \

diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 2e04608..25b7bd5 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c

@@ -28,7 +28,7 @@
 #define APPLE_IGNORE_MOUSE	0x0002
 #define APPLE_HAS_FN		0x0004
 #define APPLE_HIDDEV		0x0008
-#define APPLE_ISO_KEYBOARD	0x0010
+/* 0x0010 reserved, was: APPLE_ISO_KEYBOARD */
 #define APPLE_MIGHTYMOUSE	0x0020
 #define APPLE_INVERT_HWHEEL	0x0040
 #define APPLE_IGNORE_HIDINPUT	0x0080
@@ -36,6 +36,8 @@
 
 #define APPLE_FLAG_FKEY		0x01
 
+#define HID_COUNTRY_INTERNATIONAL_ISO	13
+
 static unsigned int fnmode = 1;
 module_param(fnmode, uint, 0644);
 MODULE_PARM_DESC(fnmode, "Mode of fn key on Apple keyboards (0 = disabled, "
@@ -247,7 +249,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input,
 	}
 
 	if (iso_layout) {
-		if (asc->quirks & APPLE_ISO_KEYBOARD) {
+		if (hid->country == HID_COUNTRY_INTERNATIONAL_ISO) {
 			trans = apple_find_translation(apple_iso_keyboard, usage->code);
 			if (trans) {
 				input_event(input, usage->type, trans->to, value);
@@ -412,60 +414,54 @@ static const struct hid_device_id apple_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER3_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
 			APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
 			APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_MINI_JIS),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_JIS),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
 			APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
 				USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
@@ -479,86 +475,85 @@ static const struct hid_device_id apple_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING2_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING3_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING7A_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING8_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI),
 		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO),
-		.driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS),
 		.driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO),
-		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
-			APPLE_ISO_KEYBOARD },
+		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS),
 		.driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY),

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index a6268f2..a4a3c38 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c

@@ -422,6 +422,33 @@ static int asus_input_mapping(struct hid_device *hdev,
 		return 1;
 	}
 
+	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) {
+		set_bit(EV_REP, hi->input->evbit);
+		switch (usage->hid & HID_USAGE) {
+		case 0xff01: asus_map_key_clear(BTN_1);	break;
+		case 0xff02: asus_map_key_clear(BTN_2);	break;
+		case 0xff03: asus_map_key_clear(BTN_3);	break;
+		case 0xff04: asus_map_key_clear(BTN_4);	break;
+		case 0xff05: asus_map_key_clear(BTN_5);	break;
+		case 0xff06: asus_map_key_clear(BTN_6);	break;
+		case 0xff07: asus_map_key_clear(BTN_7);	break;
+		case 0xff08: asus_map_key_clear(BTN_8);	break;
+		case 0xff09: asus_map_key_clear(BTN_9);	break;
+		case 0xff0a: asus_map_key_clear(BTN_A);	break;
+		case 0xff0b: asus_map_key_clear(BTN_B);	break;
+		case 0x00f1: asus_map_key_clear(KEY_WLAN);	break;
+		case 0x00f2: asus_map_key_clear(KEY_BRIGHTNESSDOWN);	break;
+		case 0x00f3: asus_map_key_clear(KEY_BRIGHTNESSUP);	break;
+		case 0x00f4: asus_map_key_clear(KEY_DISPLAY_OFF);	break;
+		case 0x00f7: asus_map_key_clear(KEY_CAMERA);	break;
+		case 0x00f8: asus_map_key_clear(KEY_PROG1);	break;
+		default:
+			return 0;
+		}
+
+		return 1;
+	}
+
 	if (drvdata->quirks & QUIRK_NO_CONSUMER_USAGES &&
 		(usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) {
 		switch (usage->hid & HID_USAGE) {
@@ -572,6 +599,9 @@ static const struct hid_device_id asus_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD),
 	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_ASUS_AK1D) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_ASUS_MD_5110) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_ASUS_MD_5112) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, asus_devices);

diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index f04ed9a..397a789 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c

@@ -84,9 +84,7 @@ static __u8 *ch_switch12_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 static const struct hid_device_id ch_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, ch_devices);

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 2241e79..6fd01a6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c

@@ -1070,7 +1070,7 @@ static s32 snto32(__u32 value, unsigned n)
 	case 16: return ((__s16)value);
 	case 32: return ((__s32)value);
 	}
-	return value & (1 << (n - 1)) ? value | (-1 << n) : value;
+	return value & (1 << (n - 1)) ? value | (~0U << n) : value;
 }
 
 s32 hid_snto32(__u32 value, unsigned n)
@@ -1774,6 +1774,94 @@ void hid_disconnect(struct hid_device *hdev)
 }
 EXPORT_SYMBOL_GPL(hid_disconnect);
 
+/**
+ * hid_hw_start - start underlying HW
+ * @hdev: hid device
+ * @connect_mask: which outputs to connect, see HID_CONNECT_*
+ *
+ * Call this in probe function *after* hid_parse. This will setup HW
+ * buffers and start the device (if not defeirred to device open).
+ * hid_hw_stop must be called if this was successful.
+ */
+int hid_hw_start(struct hid_device *hdev, unsigned int connect_mask)
+{
+	int error;
+
+	error = hdev->ll_driver->start(hdev);
+	if (error)
+		return error;
+
+	if (connect_mask) {
+		error = hid_connect(hdev, connect_mask);
+		if (error) {
+			hdev->ll_driver->stop(hdev);
+			return error;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hid_hw_start);
+
+/**
+ * hid_hw_stop - stop underlying HW
+ * @hdev: hid device
+ *
+ * This is usually called from remove function or from probe when something
+ * failed and hid_hw_start was called already.
+ */
+void hid_hw_stop(struct hid_device *hdev)
+{
+	hid_disconnect(hdev);
+	hdev->ll_driver->stop(hdev);
+}
+EXPORT_SYMBOL_GPL(hid_hw_stop);
+
+/**
+ * hid_hw_open - signal underlying HW to start delivering events
+ * @hdev: hid device
+ *
+ * Tell underlying HW to start delivering events from the device.
+ * This function should be called sometime after successful call
+ * to hid_hiw_start().
+ */
+int hid_hw_open(struct hid_device *hdev)
+{
+	int ret;
+
+	ret = mutex_lock_killable(&hdev->ll_open_lock);
+	if (ret)
+		return ret;
+
+	if (!hdev->ll_open_count++) {
+		ret = hdev->ll_driver->open(hdev);
+		if (ret)
+			hdev->ll_open_count--;
+	}
+
+	mutex_unlock(&hdev->ll_open_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hid_hw_open);
+
+/**
+ * hid_hw_close - signal underlaying HW to stop delivering events
+ *
+ * @hdev: hid device
+ *
+ * This function indicates that we are not interested in the events
+ * from this device anymore. Delivery of events may or may not stop,
+ * depending on the number of users still outstanding.
+ */
+void hid_hw_close(struct hid_device *hdev)
+{
+	mutex_lock(&hdev->ll_open_lock);
+	if (!--hdev->ll_open_count)
+		hdev->ll_driver->close(hdev);
+	mutex_unlock(&hdev->ll_open_lock);
+}
+EXPORT_SYMBOL_GPL(hid_hw_close);
+
 /*
  * A list of devices for which there is a specialized driver on HID bus.
  *
@@ -1892,6 +1980,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_ASUS_MD_5112) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_ASUS_MD_5110) },
 #endif
 #if IS_ENABLED(CONFIG_HID_AUREAL)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) },
@@ -1913,9 +2003,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
 #if IS_ENABLED(CONFIG_HID_CHICONY)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_ASUS_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 #endif
 #if IS_ENABLED(CONFIG_HID_CMEDIA)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CMEDIA, USB_DEVICE_ID_CM6533) },
@@ -1984,6 +2073,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
 #if IS_ENABLED(CONFIG_HID_ICADE)
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
 #endif
+#if IS_ENABLED(CONFIG_HID_ITE)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
+#endif
 #if IS_ENABLED(CONFIG_HID_KENSINGTON)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
 #endif
@@ -2151,6 +2243,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
 #if IS_ENABLED(CONFIG_HID_PRODIKEYS)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
 #endif
+#if IS_ENABLED(CONFIG_HID_RETRODE)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_FUTURE_TECHNOLOGY, USB_DEVICE_ID_RETRODE2) },
+#endif
 #if IS_ENABLED(CONFIG_HID_RMI)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_RAZER, USB_DEVICE_ID_RAZER_BLADE_14) },
@@ -2914,6 +3009,7 @@ struct hid_device *hid_allocate_device(void)
 	spin_lock_init(&hdev->debug_list_lock);
 	sema_init(&hdev->driver_lock, 1);
 	sema_init(&hdev->driver_input_lock, 1);
+	mutex_init(&hdev->ll_open_lock);
 
 	return hdev;
 }

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 4f9a393..3d911bf 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h

@@ -75,6 +75,8 @@
 
 #define USB_VENDOR_ID_ALPS_JP		0x044E
 #define HID_DEVICE_ID_ALPS_U1_DUAL	0x120B
+#define HID_DEVICE_ID_ALPS_U1_DUAL_PTP	0x121F
+#define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP	0x1220
 
 #define USB_VENDOR_ID_AMI		0x046b
 #define USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE	0xff10
@@ -252,7 +254,7 @@
 #define USB_DEVICE_ID_CHICONY_WIRELESS	0x0618
 #define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE	0x1053
 #define USB_DEVICE_ID_CHICONY_WIRELESS2	0x1123
-#define USB_DEVICE_ID_CHICONY_AK1D	0x1125
+#define USB_DEVICE_ID_ASUS_AK1D		0x1125
 #define USB_DEVICE_ID_CHICONY_ACER_SWITCH12	0x1421
 
 #define USB_VENDOR_ID_CHUNGHWAT		0x2247
@@ -386,6 +388,9 @@
 #define USB_VENDOR_ID_FUTABA            0x0547
 #define USB_DEVICE_ID_LED_DISPLAY       0x7000
 
+#define USB_VENDOR_ID_FUTURE_TECHNOLOGY	0x0403
+#define USB_DEVICE_ID_RETRODE2		0x97c1
+
 #define USB_VENDOR_ID_ESSENTIAL_REALITY	0x0d7f
 #define USB_DEVICE_ID_ESSENTIAL_REALITY_P5 0x0100
 
@@ -428,6 +433,9 @@
 #define USB_VENDOR_ID_GOODTOUCH		0x1aad
 #define USB_DEVICE_ID_GOODTOUCH_000f	0x000f
 
+#define USB_VENDOR_ID_GOOGLE		0x18d1
+#define USB_DEVICE_ID_GOOGLE_TOUCH_ROSE	0x5028
+
 #define USB_VENDOR_ID_GOTOP		0x08f2
 #define USB_DEVICE_ID_SUPER_Q2		0x007f
 #define USB_DEVICE_ID_GOGOPEN		0x00ce
@@ -565,6 +573,7 @@
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA   0x8386
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA2  0x8350
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA900	0x8396
+#define USB_DEVICE_ID_ITE8595		0x8595
 
 #define USB_VENDOR_ID_JABRA		0x0b0e
 #define USB_DEVICE_ID_JABRA_SPEAK_410	0x0412
@@ -573,7 +582,7 @@
 
 #define USB_VENDOR_ID_JESS		0x0c45
 #define USB_DEVICE_ID_JESS_YUREX	0x1010
-#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD	0x5112
+#define USB_DEVICE_ID_ASUS_MD_5112	0x5112
 
 #define USB_VENDOR_ID_JESS2		0x0f30
 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111
@@ -1024,6 +1033,7 @@
 
 #define USB_VENDOR_ID_TURBOX		0x062a
 #define USB_DEVICE_ID_TURBOX_KEYBOARD	0x0201
+#define USB_DEVICE_ID_ASUS_MD_5110	0x5110
 #define USB_DEVICE_ID_TURBOX_TOUCHSCREEN_MOSART	0x7100
 
 #define USB_VENDOR_ID_TWINHAN		0x6253

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index a1ebdd7..ccdff1e 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c

@@ -656,6 +656,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case HID_GD_START:	map_key_clear(BTN_START);	break;
 		case HID_GD_SELECT:	map_key_clear(BTN_SELECT);	break;
 
+		case HID_GD_RFKILL_BTN:
+			/* MS wireless radio ctl extension, also check CA */
+			if (field->application == HID_GD_WIRELESS_RADIO_CTLS) {
+				map_key_clear(KEY_RFKILL);
+				/* We need to simulate the btn release */
+				field->flags |= HID_MAIN_ITEM_RELATIVE;
+				break;
+			}
+
 		default: goto unknown;
 		}
 

diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
new file mode 100644
index 0000000..1882a4a
--- /dev/null
+++ b/drivers/hid/hid-ite.c

@@ -0,0 +1,56 @@
+/*
+ * HID driver for some ITE "special" devices
+ * Copyright (c) 2017 Hans de Goede <hdegoede@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+static int ite_event(struct hid_device *hdev, struct hid_field *field,
+		     struct hid_usage *usage, __s32 value)
+{
+	struct input_dev *input;
+
+	if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput)
+		return 0;
+
+	input = field->hidinput->input;
+
+	/*
+	 * The ITE8595 always reports 0 as value for the rfkill button. Luckily
+	 * it is the only button in its report, and it sends a report on
+	 * release only, so receiving a report means the button was pressed.
+	 */
+	if (usage->hid == HID_GD_RFKILL_BTN) {
+		input_event(input, EV_KEY, KEY_RFKILL, 1);
+		input_sync(input);
+		input_event(input, EV_KEY, KEY_RFKILL, 0);
+		input_sync(input);
+		return 1;
+	}
+
+	return 0;
+}
+
+static const struct hid_device_id ite_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, ite_devices);
+
+static struct hid_driver ite_driver = {
+	.name = "itetech",
+	.id_table = ite_devices,
+	.event = ite_event,
+};
+module_hid_driver(ite_driver);
+
+MODULE_LICENSE("GPL");

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 24d5b6d..f3e35e7 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c

@@ -44,6 +44,7 @@
 #include <linux/slab.h>
 #include <linux/input/mt.h>
 #include <linux/string.h>
+#include <linux/timer.h>
 
 
 MODULE_AUTHOR("Stephane Chatty <chatty@enac.fr>");
@@ -54,28 +55,33 @@ MODULE_LICENSE("GPL");
 #include "hid-ids.h"
 
 /* quirks to control the device */
-#define MT_QUIRK_NOT_SEEN_MEANS_UP	(1 << 0)
-#define MT_QUIRK_SLOT_IS_CONTACTID	(1 << 1)
-#define MT_QUIRK_CYPRESS		(1 << 2)
-#define MT_QUIRK_SLOT_IS_CONTACTNUMBER	(1 << 3)
-#define MT_QUIRK_ALWAYS_VALID		(1 << 4)
-#define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
-#define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
-#define MT_QUIRK_CONFIDENCE		(1 << 7)
-#define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
-#define MT_QUIRK_NO_AREA		(1 << 9)
-#define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
-#define MT_QUIRK_HOVERING		(1 << 11)
-#define MT_QUIRK_CONTACT_CNT_ACCURATE	(1 << 12)
-#define MT_QUIRK_FORCE_GET_FEATURE	(1 << 13)
-#define MT_QUIRK_FIX_CONST_CONTACT_ID	(1 << 14)
-#define MT_QUIRK_TOUCH_SIZE_SCALING	(1 << 15)
+#define MT_QUIRK_NOT_SEEN_MEANS_UP	BIT(0)
+#define MT_QUIRK_SLOT_IS_CONTACTID	BIT(1)
+#define MT_QUIRK_CYPRESS		BIT(2)
+#define MT_QUIRK_SLOT_IS_CONTACTNUMBER	BIT(3)
+#define MT_QUIRK_ALWAYS_VALID		BIT(4)
+#define MT_QUIRK_VALID_IS_INRANGE	BIT(5)
+#define MT_QUIRK_VALID_IS_CONFIDENCE	BIT(6)
+#define MT_QUIRK_CONFIDENCE		BIT(7)
+#define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	BIT(8)
+#define MT_QUIRK_NO_AREA		BIT(9)
+#define MT_QUIRK_IGNORE_DUPLICATES	BIT(10)
+#define MT_QUIRK_HOVERING		BIT(11)
+#define MT_QUIRK_CONTACT_CNT_ACCURATE	BIT(12)
+#define MT_QUIRK_FORCE_GET_FEATURE	BIT(13)
+#define MT_QUIRK_FIX_CONST_CONTACT_ID	BIT(14)
+#define MT_QUIRK_TOUCH_SIZE_SCALING	BIT(15)
+#define MT_QUIRK_STICKY_FINGERS		BIT(16)
 
 #define MT_INPUTMODE_TOUCHSCREEN	0x02
 #define MT_INPUTMODE_TOUCHPAD		0x03
 
 #define MT_BUTTONTYPE_CLICKPAD		0
 
+#define MT_IO_FLAGS_RUNNING		0
+#define MT_IO_FLAGS_ACTIVE_SLOTS	1
+#define MT_IO_FLAGS_PENDING_SLOTS	2
+
 struct mt_slot {
 	__s32 x, y, cx, cy, p, w, h;
 	__s32 contactid;	/* the device ContactID assigned to this slot */
@@ -104,8 +110,10 @@ struct mt_fields {
 struct mt_device {
 	struct mt_slot curdata;	/* placeholder of incoming data */
 	struct mt_class mtclass;	/* our mt device class */
+	struct timer_list release_timer;	/* to release sticky fingers */
 	struct mt_fields *fields;	/* temporary placeholder for storing the
 					   multitouch fields */
+	unsigned long mt_io_flags;	/* mt flags (MT_IO_FLAGS_*) */
 	int cc_index;	/* contact count field index in the report */
 	int cc_value_index;	/* contact count value index in the field */
 	unsigned last_slot_field;	/* the last field of a slot */
@@ -148,6 +156,7 @@ static void mt_post_parse(struct mt_device *td);
 /* reserved					0x0011 */
 #define MT_CLS_WIN_8				0x0012
 #define MT_CLS_EXPORT_ALL_INPUTS		0x0013
+#define MT_CLS_WIN_8_DUAL			0x0014
 
 /* vendor specific classes */
 #define MT_CLS_3M				0x0101
@@ -161,6 +170,7 @@ static void mt_post_parse(struct mt_device *td);
 #define MT_CLS_GENERALTOUCH_PWT_TENFINGERS	0x0109
 #define MT_CLS_LG				0x010a
 #define MT_CLS_VTL				0x0110
+#define MT_CLS_GOOGLE				0x0111
 
 #define MT_DEFAULT_MAXCONTACT	10
 #define MT_MAX_MAXCONTACT	250
@@ -212,11 +222,18 @@ static struct mt_class mt_classes[] = {
 		.quirks = MT_QUIRK_ALWAYS_VALID |
 			MT_QUIRK_IGNORE_DUPLICATES |
 			MT_QUIRK_HOVERING |
-			MT_QUIRK_CONTACT_CNT_ACCURATE },
+			MT_QUIRK_CONTACT_CNT_ACCURATE |
+			MT_QUIRK_STICKY_FINGERS },
 	{ .name = MT_CLS_EXPORT_ALL_INPUTS,
 		.quirks = MT_QUIRK_ALWAYS_VALID |
 			MT_QUIRK_CONTACT_CNT_ACCURATE,
 		.export_all_inputs = true },
+	{ .name = MT_CLS_WIN_8_DUAL,
+		.quirks = MT_QUIRK_ALWAYS_VALID |
+			MT_QUIRK_IGNORE_DUPLICATES |
+			MT_QUIRK_HOVERING |
+			MT_QUIRK_CONTACT_CNT_ACCURATE,
+		.export_all_inputs = true },
 
 	/*
 	 * vendor specific classes
@@ -278,6 +295,12 @@ static struct mt_class mt_classes[] = {
 			MT_QUIRK_CONTACT_CNT_ACCURATE |
 			MT_QUIRK_FORCE_GET_FEATURE,
 	},
+	{ .name = MT_CLS_GOOGLE,
+		.quirks = MT_QUIRK_ALWAYS_VALID |
+			MT_QUIRK_CONTACT_CNT_ACCURATE |
+			MT_QUIRK_SLOT_IS_CONTACTID |
+			MT_QUIRK_HOVERING
+	},
 	{ }
 };
 
@@ -512,7 +535,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_CONFIDENCE:
-			if (cls->name == MT_CLS_WIN_8 &&
+			if ((cls->name == MT_CLS_WIN_8 ||
+				cls->name == MT_CLS_WIN_8_DUAL) &&
 				field->application == HID_DG_TOUCHPAD)
 				cls->quirks |= MT_QUIRK_CONFIDENCE;
 			mt_store_field(usage, td, hi);
@@ -579,7 +603,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		 * MS PTP spec says that external buttons left and right have
 		 * usages 2 and 3.
 		 */
-		if (cls->name == MT_CLS_WIN_8 &&
+		if ((cls->name == MT_CLS_WIN_8 ||
+			cls->name == MT_CLS_WIN_8_DUAL) &&
 		    field->application == HID_DG_TOUCHPAD &&
 		    (usage->hid & HID_USAGE) > 1)
 			code--;
@@ -682,6 +707,8 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 			input_event(input, EV_ABS, ABS_MT_PRESSURE, s->p);
 			input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
 			input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
+
+			set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags);
 		}
 	}
 
@@ -697,6 +724,11 @@ static void mt_sync_frame(struct mt_device *td, struct input_dev *input)
 	input_mt_sync_frame(input);
 	input_sync(input);
 	td->num_received = 0;
+	if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags))
+		set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags);
+	else
+		clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags);
+	clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags);
 }
 
 static int mt_touch_event(struct hid_device *hid, struct hid_field *field,
@@ -788,6 +820,10 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report)
 	unsigned count;
 	int r, n;
 
+	/* sticky fingers release in progress, abort */
+	if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
+		return;
+
 	/*
 	 * Includes multi-packet support where subsequent
 	 * packets are sent with zero contactcount.
@@ -813,6 +849,34 @@ static void mt_touch_report(struct hid_device *hid, struct hid_report *report)
 
 	if (td->num_received >= td->num_expected)
 		mt_sync_frame(td, report->field[0]->hidinput->input);
+
+	/*
+	 * Windows 8 specs says 2 things:
+	 * - once a contact has been reported, it has to be reported in each
+	 *   subsequent report
+	 * - the report rate when fingers are present has to be at least
+	 *   the refresh rate of the screen, 60 or 120 Hz
+	 *
+	 * I interprete this that the specification forces a report rate of
+	 * at least 60 Hz for a touchscreen to be certified.
+	 * Which means that if we do not get a report whithin 16 ms, either
+	 * something wrong happens, either the touchscreen forgets to send
+	 * a release. Taking a reasonable margin allows to remove issues
+	 * with USB communication or the load of the machine.
+	 *
+	 * Given that Win 8 devices are forced to send a release, this will
+	 * only affect laggish machines and the ones that have a firmware
+	 * defect.
+	 */
+	if (td->mtclass.quirks & MT_QUIRK_STICKY_FINGERS) {
+		if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags))
+			mod_timer(&td->release_timer,
+				  jiffies + msecs_to_jiffies(100));
+		else
+			del_timer(&td->release_timer);
+	}
+
+	clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
 }
 
 static int mt_touch_input_configured(struct hid_device *hdev,
@@ -1124,6 +1188,47 @@ static void mt_fix_const_fields(struct hid_device *hdev, unsigned int usage)
 	}
 }
 
+static void mt_release_contacts(struct hid_device *hid)
+{
+	struct hid_input *hidinput;
+	struct mt_device *td = hid_get_drvdata(hid);
+
+	list_for_each_entry(hidinput, &hid->inputs, list) {
+		struct input_dev *input_dev = hidinput->input;
+		struct input_mt *mt = input_dev->mt;
+		int i;
+
+		if (mt) {
+			for (i = 0; i < mt->num_slots; i++) {
+				input_mt_slot(input_dev, i);
+				input_mt_report_slot_state(input_dev,
+							   MT_TOOL_FINGER,
+							   false);
+			}
+			input_mt_sync_frame(input_dev);
+			input_sync(input_dev);
+		}
+	}
+
+	td->num_received = 0;
+}
+
+static void mt_expired_timeout(unsigned long arg)
+{
+	struct hid_device *hdev = (void *)arg;
+	struct mt_device *td = hid_get_drvdata(hdev);
+
+	/*
+	 * An input report came in just before we release the sticky fingers,
+	 * it will take care of the sticky fingers.
+	 */
+	if (test_and_set_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags))
+		return;
+	if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags))
+		mt_release_contacts(hdev);
+	clear_bit(MT_IO_FLAGS_RUNNING, &td->mt_io_flags);
+}
+
 static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
 	int ret, i;
@@ -1193,6 +1298,8 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	 */
 	hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS;
 
+	setup_timer(&td->release_timer, mt_expired_timeout, (long)hdev);
+
 	ret = hid_parse(hdev);
 	if (ret != 0)
 		return ret;
@@ -1220,28 +1327,6 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 }
 
 #ifdef CONFIG_PM
-static void mt_release_contacts(struct hid_device *hid)
-{
-	struct hid_input *hidinput;
-
-	list_for_each_entry(hidinput, &hid->inputs, list) {
-		struct input_dev *input_dev = hidinput->input;
-		struct input_mt *mt = input_dev->mt;
-		int i;
-
-		if (mt) {
-			for (i = 0; i < mt->num_slots; i++) {
-				input_mt_slot(input_dev, i);
-				input_mt_report_slot_state(input_dev,
-							   MT_TOOL_FINGER,
-							   false);
-			}
-			input_mt_sync_frame(input_dev);
-			input_sync(input_dev);
-		}
-	}
-}
-
 static int mt_reset_resume(struct hid_device *hdev)
 {
 	mt_release_contacts(hdev);
@@ -1266,6 +1351,8 @@ static void mt_remove(struct hid_device *hdev)
 {
 	struct mt_device *td = hid_get_drvdata(hdev);
 
+	del_timer_sync(&td->release_timer);
+
 	sysfs_remove_group(&hdev->dev.kobj, &mt_attribute_group);
 	hid_hw_stop(hdev);
 	hdev->quirks = td->initial_quirks;
@@ -1290,6 +1377,16 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_3M,
 			USB_DEVICE_ID_3M3266) },
 
+	/* Alps devices */
+	{ .driver_data = MT_CLS_WIN_8_DUAL,
+		HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+			USB_VENDOR_ID_ALPS_JP,
+			HID_DEVICE_ID_ALPS_U1_DUAL_PTP) },
+	{ .driver_data = MT_CLS_WIN_8_DUAL,
+		HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+			USB_VENDOR_ID_ALPS_JP,
+			HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) },
+
 	/* Anton devices */
 	{ .driver_data = MT_CLS_EXPORT_ALL_INPUTS,
 		MT_USB_DEVICE(USB_VENDOR_ID_ANTON,
@@ -1569,6 +1666,11 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_XIROKU,
 			USB_DEVICE_ID_XIROKU_CSR2) },
 
+	/* Google MT devices */
+	{ .driver_data = MT_CLS_GOOGLE,
+		HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_GOOGLE,
+			USB_DEVICE_ID_GOOGLE_TOUCH_ROSE) },
+
 	/* Generic MT device */
 	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_MULTITOUCH, HID_ANY_ID, HID_ANY_ID) },
 

diff --git a/drivers/hid/hid-retrode.c b/drivers/hid/hid-retrode.c
new file mode 100644
index 0000000..30cc7eb
--- /dev/null
+++ b/drivers/hid/hid-retrode.c

@@ -0,0 +1,100 @@
+/*
+ *  HID driver for Retrode 2 controller adapter and plug-in extensions
+ *
+ *  Copyright (c) 2017 Bastien Nocera <hadess@hadess.net>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+#include "hid-ids.h"
+
+#define CONTROLLER_NAME_BASE "Retrode"
+
+static int retrode_input_configured(struct hid_device *hdev,
+					struct hid_input *hi)
+{
+	struct hid_field *field = hi->report->field[0];
+	const char *suffix;
+	int number = 0;
+	char *name;
+
+	switch (field->report->id) {
+	case 0:
+		suffix = "SNES Mouse";
+		break;
+	case 1:
+	case 2:
+		suffix = "SNES / N64";
+		number = field->report->id;
+		break;
+	case 3:
+	case 4:
+		suffix = "Mega Drive";
+		number = field->report->id - 2;
+		break;
+	default:
+		hid_err(hdev, "Got unhandled report id %d\n", field->report->id);
+		suffix = "Unknown";
+	}
+
+	if (number)
+		name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
+				"%s %s #%d", CONTROLLER_NAME_BASE,
+				suffix, number);
+	else
+		name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
+				"%s %s", CONTROLLER_NAME_BASE, suffix);
+
+	if (!name)
+		return -ENOMEM;
+
+	hi->input->name = name;
+
+	return 0;
+}
+
+static int retrode_probe(struct hid_device *hdev,
+			const struct hid_device_id *id)
+{
+
+	int ret;
+
+	/* Has no effect on the mouse device */
+	hdev->quirks |= HID_QUIRK_MULTI_INPUT;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return ret;
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct hid_device_id retrode_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_FUTURE_TECHNOLOGY, USB_DEVICE_ID_RETRODE2) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, retrode_devices);
+
+static struct hid_driver retrode_driver = {
+	.name             = "hid-retrode",
+	.id_table         = retrode_devices,
+	.input_configured = retrode_input_configured,
+	.probe            = retrode_probe,
+};
+
+module_hid_driver(retrode_driver);
+
+MODULE_LICENSE("GPL");

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index 0401503..046f692 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c

@@ -40,7 +40,7 @@
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <linux/i2c/i2c-hid.h>
+#include <linux/platform_data/i2c-hid.h>
 
 #include "../hid-ids.h"
 
@@ -743,18 +743,12 @@ static int i2c_hid_open(struct hid_device *hid)
 	struct i2c_hid *ihid = i2c_get_clientdata(client);
 	int ret = 0;
 
-	mutex_lock(&i2c_hid_open_mut);
-	if (!hid->open++) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			hid->open--;
-			goto done;
-		}
-		set_bit(I2C_HID_STARTED, &ihid->flags);
-	}
-done:
-	mutex_unlock(&i2c_hid_open_mut);
-	return ret < 0 ? ret : 0;
+	ret = pm_runtime_get_sync(&client->dev);
+	if (ret < 0)
+		return ret;
+
+	set_bit(I2C_HID_STARTED, &ihid->flags);
+	return 0;
 }
 
 static void i2c_hid_close(struct hid_device *hid)
@@ -762,18 +756,10 @@ static void i2c_hid_close(struct hid_device *hid)
 	struct i2c_client *client = hid->driver_data;
 	struct i2c_hid *ihid = i2c_get_clientdata(client);
 
-	/* protecting hid->open to make sure we don't restart
-	 * data acquistion due to a resumption we no longer
-	 * care about
-	 */
-	mutex_lock(&i2c_hid_open_mut);
-	if (!--hid->open) {
-		clear_bit(I2C_HID_STARTED, &ihid->flags);
+	clear_bit(I2C_HID_STARTED, &ihid->flags);
 
-		/* Save some power */
-		pm_runtime_put(&client->dev);
-	}
-	mutex_unlock(&i2c_hid_open_mut);
+	/* Save some power */
+	pm_runtime_put(&client->dev);
 }
 
 static int i2c_hid_power(struct hid_device *hid, int lvl)

diff --git a/drivers/hid/intel-ish-hid/Kconfig b/drivers/hid/intel-ish-hid/Kconfig
index ea065b3..519e4c8 100644
--- a/drivers/hid/intel-ish-hid/Kconfig
+++ b/drivers/hid/intel-ish-hid/Kconfig

@@ -1,5 +1,5 @@
 menu "Intel ISH HID support"
-	depends on X86_64 && PCI
+	depends on (X86_64 || COMPILE_TEST) && PCI
 
 config INTEL_ISH_HID
 	tristate "Intel Integrated Sensor Hub"

diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index fd34307..2aac097c 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h

@@ -26,6 +26,8 @@
 #define BXT_Bx_DEVICE_ID	0x1AA2
 #define APL_Ax_DEVICE_ID	0x5AA2
 #define SPT_Ax_DEVICE_ID	0x9D35
+#define CNL_Ax_DEVICE_ID	0x9DFC
+#define GLK_Ax_DEVICE_ID	0x31A2
 
 #define	REVISION_ID_CHT_A0	0x6
 #define	REVISION_ID_CHT_Ax_SI	0x0

diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index 842d841..9a60ec1 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c

@@ -296,17 +296,12 @@ static int write_ipc_from_queue(struct ishtp_device *dev)
 	/* If sending MNG_SYNC_FW_CLOCK, update clock again */
 	if (IPC_HEADER_GET_PROTOCOL(doorbell_val) == IPC_PROTOCOL_MNG &&
 		IPC_HEADER_GET_MNG_CMD(doorbell_val) == MNG_SYNC_FW_CLOCK) {
-		struct timespec ts_system;
-		struct timeval tv_utc;
-		uint64_t        usec_system, usec_utc;
+		uint64_t usec_system, usec_utc;
 		struct ipc_time_update_msg time_update;
 		struct time_sync_format ts_format;
 
-		get_monotonic_boottime(&ts_system);
-		do_gettimeofday(&tv_utc);
-		usec_system = (timespec_to_ns(&ts_system)) / NSEC_PER_USEC;
-		usec_utc = (uint64_t)tv_utc.tv_sec * 1000000 +
-						((uint32_t)tv_utc.tv_usec);
+		usec_system = ktime_to_us(ktime_get_boottime());
+		usec_utc = ktime_to_us(ktime_get_real());
 		ts_format.ts1_source = HOST_SYSTEM_TIME_USEC;
 		ts_format.ts2_source = HOST_UTC_TIME_USEC;
 		ts_format.reserved = 0;
@@ -575,15 +570,13 @@ static void fw_reset_work_fn(struct work_struct *unused)
 static void _ish_sync_fw_clock(struct ishtp_device *dev)
 {
 	static unsigned long	prev_sync;
-	struct timespec	ts;
 	uint64_t	usec;
 
 	if (prev_sync && jiffies - prev_sync < 20 * HZ)
 		return;
 
 	prev_sync = jiffies;
-	get_monotonic_boottime(&ts);
-	usec = (timespec_to_ns(&ts)) / NSEC_PER_USEC;
+	usec = ktime_to_us(ktime_get_boottime());
 	ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t));
 }
 

diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 8df81dc..20d824f 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c

@@ -35,6 +35,8 @@ static const struct pci_device_id ish_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, BXT_Bx_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, APL_Ax_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_Ax_DEVICE_ID)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, CNL_Ax_DEVICE_ID)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, GLK_Ax_DEVICE_ID)},
 	{0, }
 };
 MODULE_DEVICE_TABLE(pci, ish_pci_tbl);

diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index 5c643d7a..157b44a 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c

@@ -136,10 +136,9 @@ static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf,
 				if (1 + sizeof(struct device_info) * i >=
 						payload_len) {
 					dev_err(&client_data->cl_device->dev,
-						"[hid-ish]: [ENUM_DEVICES]: content size %lu is bigger than payload_len %u\n",
+						"[hid-ish]: [ENUM_DEVICES]: content size %zu is bigger than payload_len %zu\n",
 						1 + sizeof(struct device_info)
-						* i,
-						(unsigned int)payload_len);
+						* i, payload_len);
 				}
 
 				if (1 + sizeof(struct device_info) * i >=

diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
index aad6132..007443e 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client.c

@@ -803,7 +803,7 @@ void ishtp_cl_send_msg(struct ishtp_device *dev, struct ishtp_cl *cl)
  * @ishtp_hdr: Pointer to message header
  *
  * Receive and dispatch ISHTP client messages. This function executes in ISR
- * context
+ * or work queue context
  */
 void recv_ishtp_cl_msg(struct ishtp_device *dev,
 		       struct ishtp_msg_hdr *ishtp_hdr)
@@ -813,7 +813,6 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 	struct ishtp_cl_rb *new_rb;
 	unsigned char *buffer = NULL;
 	struct ishtp_cl_rb *complete_rb = NULL;
-	unsigned long	dev_flags;
 	unsigned long	flags;
 	int	rb_count;
 
@@ -828,7 +827,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 		goto	eoi;
 	}
 
-	spin_lock_irqsave(&dev->read_list_spinlock, dev_flags);
+	spin_lock_irqsave(&dev->read_list_spinlock, flags);
 	rb_count = -1;
 	list_for_each_entry(rb, &dev->read_list.list, list) {
 		++rb_count;
@@ -840,8 +839,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 
 		 /* If no Rx buffer is allocated, disband the rb */
 		if (rb->buffer.size == 0 || rb->buffer.data == NULL) {
-			spin_unlock_irqrestore(&dev->read_list_spinlock,
-				dev_flags);
+			spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 			dev_err(&cl->device->dev,
 				"Rx buffer is not allocated.\n");
 			list_del(&rb->list);
@@ -857,8 +855,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 		 * back FC, so communication will be stuck anyway)
 		 */
 		if (rb->buffer.size < ishtp_hdr->length + rb->buf_idx) {
-			spin_unlock_irqrestore(&dev->read_list_spinlock,
-				dev_flags);
+			spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 			dev_err(&cl->device->dev,
 				"message overflow. size %d len %d idx %ld\n",
 				rb->buffer.size, ishtp_hdr->length,
@@ -884,14 +881,13 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 			 * the whole msg arrived, send a new FC, and add a new
 			 * rb buffer for the next coming msg
 			 */
-			spin_lock_irqsave(&cl->free_list_spinlock, flags);
+			spin_lock(&cl->free_list_spinlock);
 
 			if (!list_empty(&cl->free_rb_list.list)) {
 				new_rb = list_entry(cl->free_rb_list.list.next,
 					struct ishtp_cl_rb, list);
 				list_del_init(&new_rb->list);
-				spin_unlock_irqrestore(&cl->free_list_spinlock,
-					flags);
+				spin_unlock(&cl->free_list_spinlock);
 				new_rb->cl = cl;
 				new_rb->buf_idx = 0;
 				INIT_LIST_HEAD(&new_rb->list);
@@ -900,8 +896,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 
 				ishtp_hbm_cl_flow_control_req(dev, cl);
 			} else {
-				spin_unlock_irqrestore(&cl->free_list_spinlock,
-					flags);
+				spin_unlock(&cl->free_list_spinlock);
 			}
 		}
 		/* One more fragment in message (even if this was last) */
@@ -914,7 +909,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 		break;
 	}
 
-	spin_unlock_irqrestore(&dev->read_list_spinlock, dev_flags);
+	spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 	/* If it's nobody's message, just read and discard it */
 	if (!buffer) {
 		uint8_t	rd_msg_buf[ISHTP_RD_MSG_BUF_SIZE];
@@ -925,7 +920,8 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
 	}
 
 	if (complete_rb) {
-		getnstimeofday(&cl->ts_rx);
+		cl = complete_rb->cl;
+		cl->ts_rx = ktime_get();
 		++cl->recv_msg_cnt_ipc;
 		ishtp_cl_read_complete(complete_rb);
 	}
@@ -940,7 +936,7 @@ void recv_ishtp_cl_msg(struct ishtp_device *dev,
  * @hbm: hbm buffer
  *
  * Receive and dispatch ISHTP client messages using DMA. This function executes
- * in ISR context
+ * in ISR or work queue context
  */
 void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 			   struct dma_xfer_hbm *hbm)
@@ -950,10 +946,10 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 	struct ishtp_cl_rb *new_rb;
 	unsigned char *buffer = NULL;
 	struct ishtp_cl_rb *complete_rb = NULL;
-	unsigned long	dev_flags;
 	unsigned long	flags;
 
-	spin_lock_irqsave(&dev->read_list_spinlock, dev_flags);
+	spin_lock_irqsave(&dev->read_list_spinlock, flags);
+
 	list_for_each_entry(rb, &dev->read_list.list, list) {
 		cl = rb->cl;
 		if (!cl || !(cl->host_client_id == hbm->host_client_id &&
@@ -965,8 +961,7 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 		 * If no Rx buffer is allocated, disband the rb
 		 */
 		if (rb->buffer.size == 0 || rb->buffer.data == NULL) {
-			spin_unlock_irqrestore(&dev->read_list_spinlock,
-				dev_flags);
+			spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 			dev_err(&cl->device->dev,
 				"response buffer is not allocated.\n");
 			list_del(&rb->list);
@@ -982,8 +977,7 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 		 * back FC, so communication will be stuck anyway)
 		 */
 		if (rb->buffer.size < hbm->msg_length) {
-			spin_unlock_irqrestore(&dev->read_list_spinlock,
-				dev_flags);
+			spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 			dev_err(&cl->device->dev,
 				"message overflow. size %d len %d idx %ld\n",
 				rb->buffer.size, hbm->msg_length, rb->buf_idx);
@@ -1007,14 +1001,13 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 		 * the whole msg arrived, send a new FC, and add a new
 		 * rb buffer for the next coming msg
 		 */
-		spin_lock_irqsave(&cl->free_list_spinlock, flags);
+		spin_lock(&cl->free_list_spinlock);
 
 		if (!list_empty(&cl->free_rb_list.list)) {
 			new_rb = list_entry(cl->free_rb_list.list.next,
 				struct ishtp_cl_rb, list);
 			list_del_init(&new_rb->list);
-			spin_unlock_irqrestore(&cl->free_list_spinlock,
-				flags);
+			spin_unlock(&cl->free_list_spinlock);
 			new_rb->cl = cl;
 			new_rb->buf_idx = 0;
 			INIT_LIST_HEAD(&new_rb->list);
@@ -1023,8 +1016,7 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 
 			ishtp_hbm_cl_flow_control_req(dev, cl);
 		} else {
-			spin_unlock_irqrestore(&cl->free_list_spinlock,
-				flags);
+			spin_unlock(&cl->free_list_spinlock);
 		}
 
 		/* One more fragment in message (this is always last) */
@@ -1037,7 +1029,7 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 		break;
 	}
 
-	spin_unlock_irqrestore(&dev->read_list_spinlock, dev_flags);
+	spin_unlock_irqrestore(&dev->read_list_spinlock, flags);
 	/* If it's nobody's message, just read and discard it */
 	if (!buffer) {
 		dev_err(dev->devc, "Dropped Rx (DMA) msg - no request\n");
@@ -1045,7 +1037,8 @@ void recv_ishtp_cl_msg_dma(struct ishtp_device *dev, void *msg,
 	}
 
 	if (complete_rb) {
-		getnstimeofday(&cl->ts_rx);
+		cl = complete_rb->cl;
+		cl->ts_rx = ktime_get();
 		++cl->recv_msg_cnt_dma;
 		ishtp_cl_read_complete(complete_rb);
 	}

diff --git a/drivers/hid/intel-ish-hid/ishtp/client.h b/drivers/hid/intel-ish-hid/ishtp/client.h
index 444d069..79eade5 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client.h
+++ b/drivers/hid/intel-ish-hid/ishtp/client.h

@@ -118,9 +118,9 @@ struct ishtp_cl {
 	unsigned int	out_flow_ctrl_cnt;
 
 	/* Rx msg ... out FC timing */
-	struct timespec ts_rx;
-	struct timespec ts_out_fc;
-	struct timespec ts_max_fc_delay;
+	ktime_t ts_rx;
+	ktime_t ts_out_fc;
+	ktime_t ts_max_fc_delay;
 	void *client_data;
 };
 

diff --git a/drivers/hid/intel-ish-hid/ishtp/hbm.c b/drivers/hid/intel-ish-hid/ishtp/hbm.c
index b721360..ae4a69f 100644
--- a/drivers/hid/intel-ish-hid/ishtp/hbm.c
+++ b/drivers/hid/intel-ish-hid/ishtp/hbm.c

@@ -321,13 +321,10 @@ int ishtp_hbm_cl_flow_control_req(struct ishtp_device *dev,
 	if (!rv) {
 		++cl->out_flow_ctrl_creds;
 		++cl->out_flow_ctrl_cnt;
-		getnstimeofday(&cl->ts_out_fc);
-		if (cl->ts_rx.tv_sec && cl->ts_rx.tv_nsec) {
-			struct timespec ts_diff;
-
-			ts_diff = timespec_sub(cl->ts_out_fc, cl->ts_rx);
-			if (timespec_compare(&ts_diff, &cl->ts_max_fc_delay)
-					> 0)
+		cl->ts_out_fc = ktime_get();
+		if (cl->ts_rx) {
+			ktime_t ts_diff = ktime_sub(cl->ts_out_fc, cl->ts_rx);
+			if (ktime_after(ts_diff, cl->ts_max_fc_delay))
 				cl->ts_max_fc_delay = ts_diff;
 		}
 	} else {

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 83772fa..76013eb 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c

@@ -70,8 +70,6 @@ MODULE_PARM_DESC(quirks, "Add/modify USB HID quirks by specifying "
 /*
  * Input submission and I/O error handler.
  */
-static DEFINE_MUTEX(hid_open_mut);
-
 static void hid_io_error(struct hid_device *hid);
 static int hid_submit_out(struct hid_device *hid);
 static int hid_submit_ctrl(struct hid_device *hid);
@@ -85,10 +83,10 @@ static int hid_start_in(struct hid_device *hid)
 	struct usbhid_device *usbhid = hid->driver_data;
 
 	spin_lock_irqsave(&usbhid->lock, flags);
-	if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) &&
-			!test_bit(HID_DISCONNECTED, &usbhid->iofl) &&
-			!test_bit(HID_SUSPENDED, &usbhid->iofl) &&
-			!test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) {
+	if (test_bit(HID_IN_POLLING, &usbhid->iofl) &&
+	    !test_bit(HID_DISCONNECTED, &usbhid->iofl) &&
+	    !test_bit(HID_SUSPENDED, &usbhid->iofl) &&
+	    !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) {
 		rc = usb_submit_urb(usbhid->urbin, GFP_ATOMIC);
 		if (rc != 0) {
 			clear_bit(HID_IN_RUNNING, &usbhid->iofl);
@@ -272,13 +270,13 @@ static int usbhid_restart_ctrl_queue(struct usbhid_device *usbhid)
 static void hid_irq_in(struct urb *urb)
 {
 	struct hid_device	*hid = urb->context;
-	struct usbhid_device 	*usbhid = hid->driver_data;
+	struct usbhid_device	*usbhid = hid->driver_data;
 	int			status;
 
 	switch (urb->status) {
 	case 0:			/* success */
 		usbhid->retry_delay = 0;
-		if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open)
+		if (!test_bit(HID_OPENED, &usbhid->iofl))
 			break;
 		usbhid_mark_busy(usbhid);
 		if (!test_bit(HID_RESUME_RUNNING, &usbhid->iofl)) {
@@ -677,73 +675,74 @@ static int hid_get_class_descriptor(struct usb_device *dev, int ifnum,
 	return result;
 }
 
-int usbhid_open(struct hid_device *hid)
+static int usbhid_open(struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
-	int res = 0;
+	int res;
 
-	mutex_lock(&hid_open_mut);
-	if (!hid->open++) {
-		res = usb_autopm_get_interface(usbhid->intf);
-		/* the device must be awake to reliably request remote wakeup */
-		if (res < 0) {
-			hid->open--;
-			res = -EIO;
-			goto done;
-		}
-		usbhid->intf->needs_remote_wakeup = 1;
-		set_bit(HID_RESUME_RUNNING, &usbhid->iofl);
-		res = hid_start_in(hid);
-		if (res) {
-			if (res != -ENOSPC) {
-				hid_io_error(hid);
-				res = 0;
-			} else {
-				/* no use opening if resources are insufficient */
-				hid->open--;
-				res = -EBUSY;
-				usbhid->intf->needs_remote_wakeup = 0;
-			}
-		}
-		usb_autopm_put_interface(usbhid->intf);
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL)
+		return 0;
 
-		/*
-		 * In case events are generated while nobody was listening,
-		 * some are released when the device is re-opened.
-		 * Wait 50 msec for the queue to empty before allowing events
-		 * to go through hid.
-		 */
-		if (res == 0 && !(hid->quirks & HID_QUIRK_ALWAYS_POLL))
-			msleep(50);
-		clear_bit(HID_RESUME_RUNNING, &usbhid->iofl);
+	res = usb_autopm_get_interface(usbhid->intf);
+	/* the device must be awake to reliably request remote wakeup */
+	if (res < 0)
+		return -EIO;
+
+	usbhid->intf->needs_remote_wakeup = 1;
+
+	set_bit(HID_RESUME_RUNNING, &usbhid->iofl);
+	set_bit(HID_OPENED, &usbhid->iofl);
+	set_bit(HID_IN_POLLING, &usbhid->iofl);
+
+	res = hid_start_in(hid);
+	if (res) {
+		if (res != -ENOSPC) {
+			hid_io_error(hid);
+			res = 0;
+		} else {
+			/* no use opening if resources are insufficient */
+			res = -EBUSY;
+			clear_bit(HID_OPENED, &usbhid->iofl);
+			clear_bit(HID_IN_POLLING, &usbhid->iofl);
+			usbhid->intf->needs_remote_wakeup = 0;
+		}
 	}
-done:
-	mutex_unlock(&hid_open_mut);
+
+	usb_autopm_put_interface(usbhid->intf);
+
+	/*
+	 * In case events are generated while nobody was listening,
+	 * some are released when the device is re-opened.
+	 * Wait 50 msec for the queue to empty before allowing events
+	 * to go through hid.
+	 */
+	if (res == 0)
+		msleep(50);
+
+	clear_bit(HID_RESUME_RUNNING, &usbhid->iofl);
 	return res;
 }
 
-void usbhid_close(struct hid_device *hid)
+static void usbhid_close(struct hid_device *hid)
 {
 	struct usbhid_device *usbhid = hid->driver_data;
 
-	mutex_lock(&hid_open_mut);
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL)
+		return;
 
-	/* protecting hid->open to make sure we don't restart
-	 * data acquistion due to a resumption we no longer
-	 * care about
+	/*
+	 * Make sure we don't restart data acquisition due to
+	 * a resumption we no longer care about by avoiding racing
+	 * with hid_start_in().
 	 */
 	spin_lock_irq(&usbhid->lock);
-	if (!--hid->open) {
-		spin_unlock_irq(&usbhid->lock);
-		hid_cancel_delayed_stuff(usbhid);
-		if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) {
-			usb_kill_urb(usbhid->urbin);
-			usbhid->intf->needs_remote_wakeup = 0;
-		}
-	} else {
-		spin_unlock_irq(&usbhid->lock);
-	}
-	mutex_unlock(&hid_open_mut);
+	clear_bit(HID_IN_POLLING, &usbhid->iofl);
+	clear_bit(HID_OPENED, &usbhid->iofl);
+	spin_unlock_irq(&usbhid->lock);
+
+	hid_cancel_delayed_stuff(usbhid);
+	usb_kill_urb(usbhid->urbin);
+	usbhid->intf->needs_remote_wakeup = 0;
 }
 
 /*
@@ -1135,6 +1134,7 @@ static int usbhid_start(struct hid_device *hid)
 		ret = usb_autopm_get_interface(usbhid->intf);
 		if (ret)
 			goto fail;
+		set_bit(HID_IN_POLLING, &usbhid->iofl);
 		usbhid->intf->needs_remote_wakeup = 1;
 		ret = hid_start_in(hid);
 		if (ret) {
@@ -1176,8 +1176,10 @@ static void usbhid_stop(struct hid_device *hid)
 	if (WARN_ON(!usbhid))
 		return;
 
-	if (hid->quirks & HID_QUIRK_ALWAYS_POLL)
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
+		clear_bit(HID_IN_POLLING, &usbhid->iofl);
 		usbhid->intf->needs_remote_wakeup = 0;
+	}
 
 	clear_bit(HID_STARTED, &usbhid->iofl);
 	spin_lock_irq(&usbhid->lock);	/* Sync with error and led handlers */
@@ -1203,16 +1205,19 @@ static void usbhid_stop(struct hid_device *hid)
 
 static int usbhid_power(struct hid_device *hid, int lvl)
 {
+	struct usbhid_device *usbhid = hid->driver_data;
 	int r = 0;
 
 	switch (lvl) {
 	case PM_HINT_FULLON:
-		r = usbhid_get_power(hid);
+		r = usb_autopm_get_interface(usbhid->intf);
 		break;
+
 	case PM_HINT_NORMAL:
-		usbhid_put_power(hid);
+		usb_autopm_put_interface(usbhid->intf);
 		break;
 	}
+
 	return r;
 }
 
@@ -1492,21 +1497,6 @@ static int hid_post_reset(struct usb_interface *intf)
 	return 0;
 }
 
-int usbhid_get_power(struct hid_device *hid)
-{
-	struct usbhid_device *usbhid = hid->driver_data;
-
-	return usb_autopm_get_interface(usbhid->intf);
-}
-
-void usbhid_put_power(struct hid_device *hid)
-{
-	struct usbhid_device *usbhid = hid->driver_data;
-
-	usb_autopm_put_interface(usbhid->intf);
-}
-
-
 #ifdef CONFIG_PM
 static int hid_resume_common(struct hid_device *hid, bool driver_suspended)
 {

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 0e06368..7d749b1 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c

@@ -237,8 +237,8 @@ static int hiddev_release(struct inode * inode, struct file * file)
 	mutex_lock(&list->hiddev->existancelock);
 	if (!--list->hiddev->open) {
 		if (list->hiddev->exist) {
-			usbhid_close(list->hiddev->hid);
-			usbhid_put_power(list->hiddev->hid);
+			hid_hw_close(list->hiddev->hid);
+			hid_hw_power(list->hiddev->hid, PM_HINT_NORMAL);
 		} else {
 			mutex_unlock(&list->hiddev->existancelock);
 			kfree(list->hiddev);
@@ -282,11 +282,9 @@ static int hiddev_open(struct inode *inode, struct file *file)
 	 */
 	if (list->hiddev->exist) {
 		if (!list->hiddev->open++) {
-			res = usbhid_open(hiddev->hid);
-			if (res < 0) {
-				res = -EIO;
+			res = hid_hw_open(hiddev->hid);
+			if (res < 0)
 				goto bail;
-			}
 		}
 	} else {
 		res = -ENODEV;
@@ -301,15 +299,17 @@ static int hiddev_open(struct inode *inode, struct file *file)
 	if (!list->hiddev->open++)
 		if (list->hiddev->exist) {
 			struct hid_device *hid = hiddev->hid;
-			res = usbhid_get_power(hid);
-			if (res < 0) {
-				res = -EIO;
+			res = hid_hw_power(hid, PM_HINT_FULLON);
+			if (res < 0)
 				goto bail_unlock;
-			}
-			usbhid_open(hid);
+			res = hid_hw_open(hid);
+			if (res < 0)
+				goto bail_normal_power;
 		}
 	mutex_unlock(&hiddev->existancelock);
 	return 0;
+bail_normal_power:
+	hid_hw_power(hid, PM_HINT_NORMAL);
 bail_unlock:
 	mutex_unlock(&hiddev->existancelock);
 bail:
@@ -935,7 +935,7 @@ void hiddev_disconnect(struct hid_device *hid)
 
 	if (hiddev->open) {
 		mutex_unlock(&hiddev->existancelock);
-		usbhid_close(hiddev->hid);
+		hid_hw_close(hiddev->hid);
 		wake_up_interruptible(&hiddev->wait);
 	} else {
 		mutex_unlock(&hiddev->existancelock);

diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
index fa47d66..da9c61d 100644
--- a/drivers/hid/usbhid/usbhid.h
+++ b/drivers/hid/usbhid/usbhid.h

@@ -34,11 +34,7 @@
 #include <linux/input.h>
 
 /*  API provided by hid-core.c for USB HID drivers */
-void usbhid_close(struct hid_device *hid);
-int usbhid_open(struct hid_device *hid);
 void usbhid_init_reports(struct hid_device *hid);
-int usbhid_get_power(struct hid_device *hid);
-void usbhid_put_power(struct hid_device *hid);
 struct usb_interface *usbhid_find_interface(int minor);
 
 /* iofl flags */
@@ -53,6 +49,17 @@ struct usb_interface *usbhid_find_interface(int minor);
 #define HID_KEYS_PRESSED	10
 #define HID_NO_BANDWIDTH	11
 #define HID_RESUME_RUNNING	12
+/*
+ * The device is opened, meaning there is a client that is interested
+ * in data coming from the device.
+ */
+#define HID_OPENED		13
+/*
+ * We are polling input endpoint by [re]submitting IN URB, because
+ * either HID device is opened or ALWAYS POLL quirk is set for the
+ * device.
+ */
+#define HID_IN_POLLING		14
 
 /*
  * USB-specific HID struct, to be pointed to

diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index c7b9ab1..3c37c3c 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h

@@ -138,6 +138,7 @@ struct wacom_battery {
 	struct power_supply_desc bat_desc;
 	struct power_supply *battery;
 	char bat_name[WACOM_NAME_MAX];
+	int bat_status;
 	int battery_capacity;
 	int bat_charging;
 	int bat_connected;

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 0022c0d..838c1eb 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c

@@ -1547,7 +1547,9 @@ static int wacom_battery_get_property(struct power_supply *psy,
 			val->intval = battery->battery_capacity;
 			break;
 		case POWER_SUPPLY_PROP_STATUS:
-			if (battery->bat_charging)
+			if (battery->bat_status != WACOM_POWER_SUPPLY_STATUS_AUTO)
+				val->intval = battery->bat_status;
+			else if (battery->bat_charging)
 				val->intval = POWER_SUPPLY_STATUS_CHARGING;
 			else if (battery->battery_capacity == 100 &&
 				    battery->ps_connected)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index e274c9d..9f94029 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c

@@ -57,15 +57,18 @@ static unsigned short batcap_gr[8] = { 1, 15, 25, 35, 50, 70, 100, 100 };
 static unsigned short batcap_i4[8] = { 1, 15, 30, 45, 60, 70, 85, 100 };
 
 static void __wacom_notify_battery(struct wacom_battery *battery,
-				   int bat_capacity, bool bat_charging,
-				   bool bat_connected, bool ps_connected)
+				   int bat_status, int bat_capacity,
+				   bool bat_charging, bool bat_connected,
+				   bool ps_connected)
 {
-	bool changed = battery->battery_capacity != bat_capacity  ||
+	bool changed = battery->bat_status       != bat_status    ||
+		       battery->battery_capacity != bat_capacity  ||
 		       battery->bat_charging     != bat_charging  ||
 		       battery->bat_connected    != bat_connected ||
 		       battery->ps_connected     != ps_connected;
 
 	if (changed) {
+		battery->bat_status = bat_status;
 		battery->battery_capacity = bat_capacity;
 		battery->bat_charging = bat_charging;
 		battery->bat_connected = bat_connected;
@@ -77,13 +80,13 @@ static void __wacom_notify_battery(struct wacom_battery *battery,
 }
 
 static void wacom_notify_battery(struct wacom_wac *wacom_wac,
-	int bat_capacity, bool bat_charging, bool bat_connected,
-	bool ps_connected)
+	int bat_status, int bat_capacity, bool bat_charging,
+	bool bat_connected, bool ps_connected)
 {
 	struct wacom *wacom = container_of(wacom_wac, struct wacom, wacom_wac);
 
-	__wacom_notify_battery(&wacom->battery, bat_capacity, bat_charging,
-			       bat_connected, ps_connected);
+	__wacom_notify_battery(&wacom->battery, bat_status, bat_capacity,
+			       bat_charging, bat_connected, ps_connected);
 }
 
 static int wacom_penpartner_irq(struct wacom_wac *wacom)
@@ -448,8 +451,9 @@ static int wacom_graphire_irq(struct wacom_wac *wacom)
 		rw = (data[7] >> 2 & 0x07);
 		battery_capacity = batcap_gr[rw];
 		ps_connected = rw == 7;
-		wacom_notify_battery(wacom, battery_capacity, ps_connected,
-				     1, ps_connected);
+		wacom_notify_battery(wacom, WACOM_POWER_SUPPLY_STATUS_AUTO,
+				     battery_capacity, ps_connected, 1,
+				     ps_connected);
 	}
 exit:
 	return retval;
@@ -1071,7 +1075,8 @@ static int wacom_remote_irq(struct wacom_wac *wacom_wac, size_t len)
 			wacom->led.groups[i].select = touch_ring_mode;
 	}
 
-	__wacom_notify_battery(&remote->remotes[index].battery, bat_percent,
+	__wacom_notify_battery(&remote->remotes[index].battery,
+				WACOM_POWER_SUPPLY_STATUS_AUTO, bat_percent,
 				bat_charging, 1, bat_charging);
 
 out:
@@ -1157,7 +1162,8 @@ static int wacom_intuos_bt_irq(struct wacom_wac *wacom, size_t len)
 		bat_charging = (power_raw & 0x08) ? 1 : 0;
 		ps_connected = (power_raw & 0x10) ? 1 : 0;
 		battery_capacity = batcap_i4[power_raw & 0x07];
-		wacom_notify_battery(wacom, battery_capacity, bat_charging,
+		wacom_notify_battery(wacom, WACOM_POWER_SUPPLY_STATUS_AUTO,
+				     battery_capacity, bat_charging,
 				     battery_capacity || bat_charging,
 				     ps_connected);
 		break;
@@ -1334,7 +1340,8 @@ static void wacom_intuos_pro2_bt_battery(struct wacom_wac *wacom)
 	bool chg = data[284] & 0x80;
 	int battery_status = data[284] & 0x7F;
 
-	wacom_notify_battery(wacom, battery_status, chg, 1, chg);
+	wacom_notify_battery(wacom, WACOM_POWER_SUPPLY_STATUS_AUTO,
+			     battery_status, chg, 1, chg);
 }
 
 static int wacom_intuos_pro2_bt_irq(struct wacom_wac *wacom, size_t len)
@@ -1696,6 +1703,82 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage,
 	}
 }
 
+static void wacom_wac_battery_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct wacom_features *features = &wacom_wac->features;
+	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
+
+	switch (equivalent_usage) {
+	case HID_DG_BATTERYSTRENGTH:
+	case WACOM_HID_WD_BATTERY_LEVEL:
+	case WACOM_HID_WD_BATTERY_CHARGING:
+		features->quirks |= WACOM_QUIRK_BATTERY;
+		break;
+	}
+}
+
+static void wacom_wac_battery_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
+
+	switch (equivalent_usage) {
+	case HID_DG_BATTERYSTRENGTH:
+		if (value == 0) {
+			wacom_wac->hid_data.bat_status = POWER_SUPPLY_STATUS_UNKNOWN;
+		}
+		else {
+			value = value * 100 / (field->logical_maximum - field->logical_minimum);
+			wacom_wac->hid_data.battery_capacity = value;
+			wacom_wac->hid_data.bat_connected = 1;
+			wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
+		}
+		break;
+	case WACOM_HID_WD_BATTERY_LEVEL:
+		value = value * 100 / (field->logical_maximum - field->logical_minimum);
+		wacom_wac->hid_data.battery_capacity = value;
+		wacom_wac->hid_data.bat_connected = 1;
+		wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
+		break;
+	case WACOM_HID_WD_BATTERY_CHARGING:
+		wacom_wac->hid_data.bat_charging = value;
+		wacom_wac->hid_data.ps_connected = value;
+		wacom_wac->hid_data.bat_connected = 1;
+		wacom_wac->hid_data.bat_status = WACOM_POWER_SUPPLY_STATUS_AUTO;
+		break;
+	}
+}
+
+static void wacom_wac_battery_pre_report(struct hid_device *hdev,
+		struct hid_report *report)
+{
+	return;
+}
+
+static void wacom_wac_battery_report(struct hid_device *hdev,
+		struct hid_report *report)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct wacom_features *features = &wacom_wac->features;
+
+	if (features->quirks & WACOM_QUIRK_BATTERY) {
+		int status = wacom_wac->hid_data.bat_status;
+		int capacity = wacom_wac->hid_data.battery_capacity;
+		bool charging = wacom_wac->hid_data.bat_charging;
+		bool connected = wacom_wac->hid_data.bat_connected;
+		bool powered = wacom_wac->hid_data.ps_connected;
+
+		wacom_notify_battery(wacom_wac, status, capacity, charging,
+				     connected, powered);
+	}
+}
+
 static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
 		struct hid_field *field, struct hid_usage *usage)
 {
@@ -1706,10 +1789,6 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
 	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
 
 	switch (equivalent_usage) {
-	case WACOM_HID_WD_BATTERY_LEVEL:
-	case WACOM_HID_WD_BATTERY_CHARGING:
-		features->quirks |= WACOM_QUIRK_BATTERY;
-		break;
 	case WACOM_HID_WD_ACCELEROMETER_X:
 		__set_bit(INPUT_PROP_ACCELEROMETER, input->propbit);
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_X, 0);
@@ -1803,27 +1882,6 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
 	}
 }
 
-static void wacom_wac_pad_battery_event(struct hid_device *hdev, struct hid_field *field,
-		struct hid_usage *usage, __s32 value)
-{
-	struct wacom *wacom = hid_get_drvdata(hdev);
-	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
-	unsigned equivalent_usage = wacom_equivalent_usage(usage->hid);
-
-	switch (equivalent_usage) {
-	case WACOM_HID_WD_BATTERY_LEVEL:
-		wacom_wac->hid_data.battery_capacity = value;
-		wacom_wac->hid_data.bat_connected = 1;
-		break;
-
-	case WACOM_HID_WD_BATTERY_CHARGING:
-		wacom_wac->hid_data.bat_charging = value;
-		wacom_wac->hid_data.ps_connected = value;
-		wacom_wac->hid_data.bat_connected = 1;
-		break;
-	}
-}
-
 static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field,
 		struct hid_usage *usage, __s32 value)
 {
@@ -1897,24 +1955,6 @@ static void wacom_wac_pad_pre_report(struct hid_device *hdev,
 	wacom_wac->hid_data.inrange_state = 0;
 }
 
-static void wacom_wac_pad_battery_report(struct hid_device *hdev,
-		struct hid_report *report)
-{
-	struct wacom *wacom = hid_get_drvdata(hdev);
-	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
-	struct wacom_features *features = &wacom_wac->features;
-
-	if (features->quirks & WACOM_QUIRK_BATTERY) {
-		int capacity = wacom_wac->hid_data.battery_capacity;
-		bool charging = wacom_wac->hid_data.bat_charging;
-		bool connected = wacom_wac->hid_data.bat_connected;
-		bool powered = wacom_wac->hid_data.ps_connected;
-
-		wacom_notify_battery(wacom_wac, capacity, charging,
-				     connected, powered);
-	}
-}
-
 static void wacom_wac_pad_report(struct hid_device *hdev,
 		struct hid_report *report)
 {
@@ -1960,9 +2000,6 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 	case HID_DG_INRANGE:
 		wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_PEN, 0);
 		break;
-	case HID_DG_BATTERYSTRENGTH:
-		features->quirks |= WACOM_QUIRK_BATTERY;
-		break;
 	case HID_DG_INVERT:
 		wacom_map_usage(input, usage, field, EV_KEY,
 				BTN_TOOL_RUBBER, 0);
@@ -2035,10 +2072,6 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
 		if (!(features->quirks & WACOM_QUIRK_SENSE))
 			wacom_wac->hid_data.sense_state = value;
 		return;
-	case HID_DG_BATTERYSTRENGTH:
-		wacom_wac->hid_data.battery_capacity = value;
-		wacom_wac->hid_data.bat_connected = 1;
-		break;
 	case HID_DG_INVERT:
 		wacom_wac->hid_data.invert_state = value;
 		return;
@@ -2077,28 +2110,28 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
 		return;
 	case WACOM_HID_WD_OFFSETLEFT:
 		if (features->offset_left && value != features->offset_left)
-			hid_warn(hdev, "%s: overriding exising left offset "
+			hid_warn(hdev, "%s: overriding existing left offset "
 				 "%d -> %d\n", __func__, value,
 				 features->offset_left);
 		features->offset_left = value;
 		return;
 	case WACOM_HID_WD_OFFSETRIGHT:
 		if (features->offset_right && value != features->offset_right)
-			hid_warn(hdev, "%s: overriding exising right offset "
+			hid_warn(hdev, "%s: overriding existing right offset "
 				 "%d -> %d\n", __func__, value,
 				 features->offset_right);
 		features->offset_right = value;
 		return;
 	case WACOM_HID_WD_OFFSETTOP:
 		if (features->offset_top && value != features->offset_top)
-			hid_warn(hdev, "%s: overriding exising top offset "
+			hid_warn(hdev, "%s: overriding existing top offset "
 				 "%d -> %d\n", __func__, value,
 				 features->offset_top);
 		features->offset_top = value;
 		return;
 	case WACOM_HID_WD_OFFSETBOTTOM:
 		if (features->offset_bottom && value != features->offset_bottom)
-			hid_warn(hdev, "%s: overriding exising bottom offset "
+			hid_warn(hdev, "%s: overriding existing bottom offset "
 				 "%d -> %d\n", __func__, value,
 				 features->offset_bottom);
 		features->offset_bottom = value;
@@ -2395,7 +2428,10 @@ void wacom_wac_usage_mapping(struct hid_device *hdev,
 	if (WACOM_DIRECT_DEVICE(field))
 		features->device_type |= WACOM_DEVICETYPE_DIRECT;
 
-	if (WACOM_PAD_FIELD(field))
+	/* usage tests must precede field tests */
+	if (WACOM_BATTERY_USAGE(usage))
+		wacom_wac_battery_usage_mapping(hdev, field, usage);
+	else if (WACOM_PAD_FIELD(field))
 		wacom_wac_pad_usage_mapping(hdev, field, usage);
 	else if (WACOM_PEN_FIELD(field))
 		wacom_wac_pen_usage_mapping(hdev, field, usage);
@@ -2414,11 +2450,12 @@ void wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
 	if (value > field->logical_maximum || value < field->logical_minimum)
 		return;
 
-	if (WACOM_PAD_FIELD(field)) {
-		wacom_wac_pad_battery_event(hdev, field, usage, value);
-		if (wacom->wacom_wac.pad_input)
-			wacom_wac_pad_event(hdev, field, usage, value);
-	} else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
+	/* usage tests must precede field tests */
+	if (WACOM_BATTERY_USAGE(usage))
+		wacom_wac_battery_event(hdev, field, usage, value);
+	else if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input)
+		wacom_wac_pad_event(hdev, field, usage, value);
+	else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
 		wacom_wac_pen_event(hdev, field, usage, value);
 	else if (WACOM_FINGER_FIELD(field) && wacom->wacom_wac.touch_input)
 		wacom_wac_finger_event(hdev, field, usage, value);
@@ -2452,6 +2489,8 @@ void wacom_wac_report(struct hid_device *hdev, struct hid_report *report)
 	if (wacom_wac->features.type != HID_GENERIC)
 		return;
 
+	wacom_wac_battery_pre_report(hdev, report);
+
 	if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input)
 		wacom_wac_pad_pre_report(hdev, report);
 	else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
@@ -2471,11 +2510,11 @@ void wacom_wac_report(struct hid_device *hdev, struct hid_report *report)
 	if (report->type != HID_INPUT_REPORT)
 		return;
 
-	if (WACOM_PAD_FIELD(field)) {
-		wacom_wac_pad_battery_report(hdev, report);
-		if (wacom->wacom_wac.pad_input)
-			wacom_wac_pad_report(hdev, report);
-	} else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
+	wacom_wac_battery_report(hdev, report);
+
+	if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input)
+		wacom_wac_pad_report(hdev, report);
+	else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
 		wacom_wac_pen_report(hdev, report);
 	else if (WACOM_FINGER_FIELD(field) && wacom->wacom_wac.touch_input)
 		wacom_wac_finger_report(hdev, report);
@@ -2813,13 +2852,14 @@ static int wacom_wireless_irq(struct wacom_wac *wacom, size_t len)
 			wacom_schedule_work(wacom, WACOM_WORKER_WIRELESS);
 		}
 
-		wacom_notify_battery(wacom, battery, charging, 1, 0);
+		wacom_notify_battery(wacom, WACOM_POWER_SUPPLY_STATUS_AUTO,
+				     battery, charging, 1, 0);
 
 	} else if (wacom->pid != 0) {
 		/* disconnected while previously connected */
 		wacom->pid = 0;
 		wacom_schedule_work(wacom, WACOM_WORKER_WIRELESS);
-		wacom_notify_battery(wacom, 0, 0, 0, 0);
+		wacom_notify_battery(wacom, POWER_SUPPLY_STATUS_UNKNOWN, 0, 0, 0, 0);
 	}
 
 	return 0;
@@ -2847,8 +2887,8 @@ static int wacom_status_irq(struct wacom_wac *wacom_wac, size_t len)
 		int battery = (data[8] & 0x3f) * 100 / 31;
 		bool charging = !!(data[8] & 0x80);
 
-		wacom_notify_battery(wacom_wac, battery, charging,
-				     battery || charging, 1);
+		wacom_notify_battery(wacom_wac, WACOM_POWER_SUPPLY_STATUS_AUTO,
+				     battery, charging, battery || charging, 1);
 
 		if (!wacom->battery.battery &&
 		    !(features->quirks & WACOM_QUIRK_BATTERY)) {
@@ -2860,7 +2900,7 @@ static int wacom_status_irq(struct wacom_wac *wacom_wac, size_t len)
 		 wacom->battery.battery) {
 		features->quirks &= ~WACOM_QUIRK_BATTERY;
 		wacom_schedule_work(wacom_wac, WACOM_WORKER_BATTERY);
-		wacom_notify_battery(wacom_wac, 0, 0, 0, 0);
+		wacom_notify_battery(wacom_wac, POWER_SUPPLY_STATUS_UNKNOWN, 0, 0, 0, 0);
 	}
 	return 0;
 }

diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 570d295..8a03654 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h

@@ -96,6 +96,8 @@
 #define WACOM_DEVICETYPE_WL_MONITOR     0x0008
 #define WACOM_DEVICETYPE_DIRECT         0x0010
 
+#define WACOM_POWER_SUPPLY_STATUS_AUTO  -1
+
 #define WACOM_HID_UP_WACOMDIGITIZER     0xff0d0000
 #define WACOM_HID_SP_PAD                0x00040000
 #define WACOM_HID_SP_BUTTON             0x00090000
@@ -151,6 +153,10 @@
 #define WACOM_HID_WT_X                  (WACOM_HID_UP_WACOMTOUCH | 0x130)
 #define WACOM_HID_WT_Y                  (WACOM_HID_UP_WACOMTOUCH | 0x131)
 
+#define WACOM_BATTERY_USAGE(f)	(((f)->hid == HID_DG_BATTERYSTRENGTH) || \
+				 ((f)->hid == WACOM_HID_WD_BATTERY_CHARGING) || \
+				 ((f)->hid == WACOM_HID_WD_BATTERY_LEVEL))
+
 #define WACOM_PAD_FIELD(f)	(((f)->physical == HID_DG_TABLETFUNCTIONKEY) || \
 				 ((f)->physical == WACOM_HID_WD_DIGITIZERFNKEYS) || \
 				 ((f)->physical == WACOM_HID_WD_DIGITIZERINFO))
@@ -297,6 +303,7 @@ struct hid_data {
 	int last_slot_field;
 	int num_expected;
 	int num_received;
+	int bat_status;
 	int battery_capacity;
 	int bat_charging;
 	int bat_connected;

diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index 45095b3..7bb65a4 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile

@@ -4,6 +4,11 @@
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
 obj-$(CONFIG_I2C)		+= i2c-core.o
+i2c-core-objs 			:= i2c-core-base.o i2c-core-smbus.o
+i2c-core-$(CONFIG_ACPI)		+= i2c-core-acpi.o
+i2c-core-$(CONFIG_I2C_SLAVE) 	+= i2c-core-slave.o
+i2c-core-$(CONFIG_OF) 		+= i2c-core-of.o
+
 obj-$(CONFIG_I2C_SMBUS)		+= i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-$(CONFIG_I2C_MUX)		+= i2c-mux.o
@@ -12,4 +17,4 @@
 obj-$(CONFIG_I2C_SLAVE_EEPROM)	+= i2c-slave-eeprom.o
 
 ccflags-$(CONFIG_I2C_DEBUG_CORE) := -DDEBUG
-CFLAGS_i2c-core.o := -Wno-deprecated-declarations
+CFLAGS_i2c-core-base.o := -Wno-deprecated-declarations

diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index a8e89df..1147bdd 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c

@@ -553,9 +553,16 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 		nak_ok = pmsg->flags & I2C_M_IGNORE_NAK;
 		if (!(pmsg->flags & I2C_M_NOSTART)) {
 			if (i) {
-				bit_dbg(3, &i2c_adap->dev, "emitting "
-					"repeated start condition\n");
-				i2c_repstart(adap);
+				if (msgs[i - 1].flags & I2C_M_STOP) {
+					bit_dbg(3, &i2c_adap->dev,
+						"emitting enforced stop/start condition\n");
+					i2c_stop(adap);
+					i2c_start(adap);
+				} else {
+					bit_dbg(3, &i2c_adap->dev,
+						"emitting repeated start condition\n");
+					i2c_repstart(adap);
+				}
 			}
 			ret = bit_doAddress(i2c_adap, pmsg);
 			if ((ret != 0) && !nak_ok) {

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 144cbad..1006b23 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig

@@ -129,6 +129,8 @@
 	    Broxton (SOC)
 	    Lewisburg (PCH)
 	    Gemini Lake (SOC)
+	    Cannon Lake-H (PCH)
+	    Cannon Lake-LP (PCH)
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-i801.
@@ -326,6 +328,16 @@
 
 comment "I2C system bus drivers (mostly embedded / system-on-chip)"
 
+config I2C_ASPEED
+	tristate "Aspeed I2C Controller"
+	depends on ARCH_ASPEED || COMPILE_TEST
+	help
+	  If you say yes to this option, support will be included for the
+	  Aspeed I2C controller.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-aspeed.
+
 config I2C_AT91
 	tristate "Atmel AT91 I2C Two-Wire interface (TWI)"
 	depends on ARCH_AT91
@@ -474,11 +486,22 @@
 	depends on (ACPI && COMMON_CLK) || !ACPI
 	help
 	  If you say yes to this option, support will be included for the
-	  Synopsys DesignWare I2C adapter. Only master mode is supported.
+	  Synopsys DesignWare I2C adapter.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-designware-platform.
 
+config I2C_DESIGNWARE_SLAVE
+	bool "Synopsys DesignWare Slave"
+	select I2C_SLAVE
+	depends on I2C_DESIGNWARE_PLATFORM
+	help
+	  If you say yes to this option, support will be included for the
+	  Synopsys DesignWare I2C slave adapter.
+
+	  This is not a standalone module, this module compiles together with
+	  i2c-designware-core.
+
 config I2C_DESIGNWARE_PCI
 	tristate "Synopsys DesignWare PCI"
 	depends on PCI
@@ -1258,4 +1281,13 @@
 	  This driver can also be built as a module. If so, the module will be
 	  called as i2c-opal.
 
+config I2C_ZX2967
+	tristate "ZTE ZX2967 I2C support"
+	depends on ARCH_ZX
+	default y
+	help
+	  Selecting this option will add ZX2967 I2C driver.
+	  This driver can also be built as a module. If so, the module will be
+	  called i2c-zx2967.
+
 endmenu

diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 30b6085..1b2fc81 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile

@@ -29,6 +29,7 @@
 obj-$(CONFIG_I2C_POWERMAC)	+= i2c-powermac.o
 
 # Embedded system I2C/SMBus host controller drivers
+obj-$(CONFIG_I2C_ASPEED)	+= i2c-aspeed.o
 obj-$(CONFIG_I2C_AT91)		+= i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_AXXIA)		+= i2c-axxia.o
@@ -40,6 +41,10 @@
 obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
+i2c-designware-core-objs := i2c-designware-common.o i2c-designware-master.o
+ifeq ($(CONFIG_I2C_DESIGNWARE_SLAVE),y)
+i2c-designware-core-objs += i2c-designware-slave.o
+endif
 obj-$(CONFIG_I2C_DESIGNWARE_PLATFORM)	+= i2c-designware-platform.o
 i2c-designware-platform-objs := i2c-designware-platdrv.o
 i2c-designware-platform-$(CONFIG_I2C_DESIGNWARE_BAYTRAIL) += i2c-designware-baytrail.o
@@ -102,6 +107,7 @@
 obj-$(CONFIG_I2C_XLR)		+= i2c-xlr.o
 obj-$(CONFIG_I2C_XLP9XX)	+= i2c-xlp9xx.o
 obj-$(CONFIG_I2C_RCAR)		+= i2c-rcar.o
+obj-$(CONFIG_I2C_ZX2967)	+= i2c-zx2967.o
 
 # External I2C/SMBus adapter drivers
 obj-$(CONFIG_I2C_DIOLAN_U2C)	+= i2c-diolan-u2c.o

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
new file mode 100644
index 0000000..f193483
--- /dev/null
+++ b/drivers/i2c/busses/i2c-aspeed.c

@@ -0,0 +1,891 @@
+/*
+ *  Aspeed 24XX/25XX I2C Controller.
+ *
+ *  Copyright (C) 2012-2017 ASPEED Technology Inc.
+ *  Copyright 2017 IBM Corporation
+ *  Copyright 2017 Google, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/* I2C Register */
+#define ASPEED_I2C_FUN_CTRL_REG				0x00
+#define ASPEED_I2C_AC_TIMING_REG1			0x04
+#define ASPEED_I2C_AC_TIMING_REG2			0x08
+#define ASPEED_I2C_INTR_CTRL_REG			0x0c
+#define ASPEED_I2C_INTR_STS_REG				0x10
+#define ASPEED_I2C_CMD_REG				0x14
+#define ASPEED_I2C_DEV_ADDR_REG				0x18
+#define ASPEED_I2C_BYTE_BUF_REG				0x20
+
+/* Global Register Definition */
+/* 0x00 : I2C Interrupt Status Register  */
+/* 0x08 : I2C Interrupt Target Assignment  */
+
+/* Device Register Definition */
+/* 0x00 : I2CD Function Control Register  */
+#define ASPEED_I2CD_MULTI_MASTER_DIS			BIT(15)
+#define ASPEED_I2CD_SDA_DRIVE_1T_EN			BIT(8)
+#define ASPEED_I2CD_M_SDA_DRIVE_1T_EN			BIT(7)
+#define ASPEED_I2CD_M_HIGH_SPEED_EN			BIT(6)
+#define ASPEED_I2CD_SLAVE_EN				BIT(1)
+#define ASPEED_I2CD_MASTER_EN				BIT(0)
+
+/* 0x04 : I2CD Clock and AC Timing Control Register #1 */
+#define ASPEED_I2CD_TIME_SCL_HIGH_SHIFT			16
+#define ASPEED_I2CD_TIME_SCL_HIGH_MASK			GENMASK(19, 16)
+#define ASPEED_I2CD_TIME_SCL_LOW_SHIFT			12
+#define ASPEED_I2CD_TIME_SCL_LOW_MASK			GENMASK(15, 12)
+#define ASPEED_I2CD_TIME_BASE_DIVISOR_MASK		GENMASK(3, 0)
+#define ASPEED_I2CD_TIME_SCL_REG_MAX			GENMASK(3, 0)
+/* 0x08 : I2CD Clock and AC Timing Control Register #2 */
+#define ASPEED_NO_TIMEOUT_CTRL				0
+
+/* 0x0c : I2CD Interrupt Control Register &
+ * 0x10 : I2CD Interrupt Status Register
+ *
+ * These share bit definitions, so use the same values for the enable &
+ * status bits.
+ */
+#define ASPEED_I2CD_INTR_SDA_DL_TIMEOUT			BIT(14)
+#define ASPEED_I2CD_INTR_BUS_RECOVER_DONE		BIT(13)
+#define ASPEED_I2CD_INTR_SLAVE_MATCH			BIT(7)
+#define ASPEED_I2CD_INTR_SCL_TIMEOUT			BIT(6)
+#define ASPEED_I2CD_INTR_ABNORMAL			BIT(5)
+#define ASPEED_I2CD_INTR_NORMAL_STOP			BIT(4)
+#define ASPEED_I2CD_INTR_ARBIT_LOSS			BIT(3)
+#define ASPEED_I2CD_INTR_RX_DONE			BIT(2)
+#define ASPEED_I2CD_INTR_TX_NAK				BIT(1)
+#define ASPEED_I2CD_INTR_TX_ACK				BIT(0)
+#define ASPEED_I2CD_INTR_ALL						       \
+		(ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |			       \
+		 ASPEED_I2CD_INTR_BUS_RECOVER_DONE |			       \
+		 ASPEED_I2CD_INTR_SCL_TIMEOUT |				       \
+		 ASPEED_I2CD_INTR_ABNORMAL |				       \
+		 ASPEED_I2CD_INTR_NORMAL_STOP |				       \
+		 ASPEED_I2CD_INTR_ARBIT_LOSS |				       \
+		 ASPEED_I2CD_INTR_RX_DONE |				       \
+		 ASPEED_I2CD_INTR_TX_NAK |				       \
+		 ASPEED_I2CD_INTR_TX_ACK)
+
+/* 0x14 : I2CD Command/Status Register   */
+#define ASPEED_I2CD_SCL_LINE_STS			BIT(18)
+#define ASPEED_I2CD_SDA_LINE_STS			BIT(17)
+#define ASPEED_I2CD_BUS_BUSY_STS			BIT(16)
+#define ASPEED_I2CD_BUS_RECOVER_CMD			BIT(11)
+
+/* Command Bit */
+#define ASPEED_I2CD_M_STOP_CMD				BIT(5)
+#define ASPEED_I2CD_M_S_RX_CMD_LAST			BIT(4)
+#define ASPEED_I2CD_M_RX_CMD				BIT(3)
+#define ASPEED_I2CD_S_TX_CMD				BIT(2)
+#define ASPEED_I2CD_M_TX_CMD				BIT(1)
+#define ASPEED_I2CD_M_START_CMD				BIT(0)
+
+/* 0x18 : I2CD Slave Device Address Register   */
+#define ASPEED_I2CD_DEV_ADDR_MASK			GENMASK(6, 0)
+
+enum aspeed_i2c_master_state {
+	ASPEED_I2C_MASTER_START,
+	ASPEED_I2C_MASTER_TX_FIRST,
+	ASPEED_I2C_MASTER_TX,
+	ASPEED_I2C_MASTER_RX_FIRST,
+	ASPEED_I2C_MASTER_RX,
+	ASPEED_I2C_MASTER_STOP,
+	ASPEED_I2C_MASTER_INACTIVE,
+};
+
+enum aspeed_i2c_slave_state {
+	ASPEED_I2C_SLAVE_START,
+	ASPEED_I2C_SLAVE_READ_REQUESTED,
+	ASPEED_I2C_SLAVE_READ_PROCESSED,
+	ASPEED_I2C_SLAVE_WRITE_REQUESTED,
+	ASPEED_I2C_SLAVE_WRITE_RECEIVED,
+	ASPEED_I2C_SLAVE_STOP,
+};
+
+struct aspeed_i2c_bus {
+	struct i2c_adapter		adap;
+	struct device			*dev;
+	void __iomem			*base;
+	/* Synchronizes I/O mem access to base. */
+	spinlock_t			lock;
+	struct completion		cmd_complete;
+	unsigned long			parent_clk_frequency;
+	u32				bus_frequency;
+	/* Transaction state. */
+	enum aspeed_i2c_master_state	master_state;
+	struct i2c_msg			*msgs;
+	size_t				buf_index;
+	size_t				msgs_index;
+	size_t				msgs_count;
+	bool				send_stop;
+	int				cmd_err;
+	/* Protected only by i2c_lock_bus */
+	int				master_xfer_result;
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	struct i2c_client		*slave;
+	enum aspeed_i2c_slave_state	slave_state;
+#endif /* CONFIG_I2C_SLAVE */
+};
+
+static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus);
+
+static int aspeed_i2c_recover_bus(struct aspeed_i2c_bus *bus)
+{
+	unsigned long time_left, flags;
+	int ret = 0;
+	u32 command;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	command = readl(bus->base + ASPEED_I2C_CMD_REG);
+
+	if (command & ASPEED_I2CD_SDA_LINE_STS) {
+		/* Bus is idle: no recovery needed. */
+		if (command & ASPEED_I2CD_SCL_LINE_STS)
+			goto out;
+		dev_dbg(bus->dev, "SCL hung (state %x), attempting recovery\n",
+			command);
+
+		reinit_completion(&bus->cmd_complete);
+		writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		spin_unlock_irqrestore(&bus->lock, flags);
+
+		time_left = wait_for_completion_timeout(
+				&bus->cmd_complete, bus->adap.timeout);
+
+		spin_lock_irqsave(&bus->lock, flags);
+		if (time_left == 0)
+			goto reset_out;
+		else if (bus->cmd_err)
+			goto reset_out;
+		/* Recovery failed. */
+		else if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+			   ASPEED_I2CD_SCL_LINE_STS))
+			goto reset_out;
+	/* Bus error. */
+	} else {
+		dev_dbg(bus->dev, "SDA hung (state %x), attempting recovery\n",
+			command);
+
+		reinit_completion(&bus->cmd_complete);
+		/* Writes 1 to 8 SCL clock cycles until SDA is released. */
+		writel(ASPEED_I2CD_BUS_RECOVER_CMD,
+		       bus->base + ASPEED_I2C_CMD_REG);
+		spin_unlock_irqrestore(&bus->lock, flags);
+
+		time_left = wait_for_completion_timeout(
+				&bus->cmd_complete, bus->adap.timeout);
+
+		spin_lock_irqsave(&bus->lock, flags);
+		if (time_left == 0)
+			goto reset_out;
+		else if (bus->cmd_err)
+			goto reset_out;
+		/* Recovery failed. */
+		else if (!(readl(bus->base + ASPEED_I2C_CMD_REG) &
+			   ASPEED_I2CD_SDA_LINE_STS))
+			goto reset_out;
+	}
+
+out:
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return ret;
+
+reset_out:
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return aspeed_i2c_reset(bus);
+}
+
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+static bool aspeed_i2c_slave_irq(struct aspeed_i2c_bus *bus)
+{
+	u32 command, irq_status, status_ack = 0;
+	struct i2c_client *slave = bus->slave;
+	bool irq_handled = true;
+	u8 value;
+
+	spin_lock(&bus->lock);
+	if (!slave) {
+		irq_handled = false;
+		goto out;
+	}
+
+	command = readl(bus->base + ASPEED_I2C_CMD_REG);
+	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	/* Slave was requested, restart state machine. */
+	if (irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH) {
+		status_ack |= ASPEED_I2CD_INTR_SLAVE_MATCH;
+		bus->slave_state = ASPEED_I2C_SLAVE_START;
+	}
+
+	/* Slave is not currently active, irq was for someone else. */
+	if (bus->slave_state == ASPEED_I2C_SLAVE_STOP) {
+		irq_handled = false;
+		goto out;
+	}
+
+	dev_dbg(bus->dev, "slave irq status 0x%08x, cmd 0x%08x\n",
+		irq_status, command);
+
+	/* Slave was sent something. */
+	if (irq_status & ASPEED_I2CD_INTR_RX_DONE) {
+		value = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
+		/* Handle address frame. */
+		if (bus->slave_state == ASPEED_I2C_SLAVE_START) {
+			if (value & 0x1)
+				bus->slave_state =
+						ASPEED_I2C_SLAVE_READ_REQUESTED;
+			else
+				bus->slave_state =
+						ASPEED_I2C_SLAVE_WRITE_REQUESTED;
+		}
+		status_ack |= ASPEED_I2CD_INTR_RX_DONE;
+	}
+
+	/* Slave was asked to stop. */
+	if (irq_status & ASPEED_I2CD_INTR_NORMAL_STOP) {
+		status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
+		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	}
+	if (irq_status & ASPEED_I2CD_INTR_TX_NAK) {
+		status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+		bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	}
+
+	switch (bus->slave_state) {
+	case ASPEED_I2C_SLAVE_READ_REQUESTED:
+		if (irq_status & ASPEED_I2CD_INTR_TX_ACK)
+			dev_err(bus->dev, "Unexpected ACK on read request.\n");
+		bus->slave_state = ASPEED_I2C_SLAVE_READ_PROCESSED;
+
+		i2c_slave_event(slave, I2C_SLAVE_READ_REQUESTED, &value);
+		writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+		writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		break;
+	case ASPEED_I2C_SLAVE_READ_PROCESSED:
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		if (!(irq_status & ASPEED_I2CD_INTR_TX_ACK))
+			dev_err(bus->dev,
+				"Expected ACK after processed read.\n");
+		i2c_slave_event(slave, I2C_SLAVE_READ_PROCESSED, &value);
+		writel(value, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+		writel(ASPEED_I2CD_S_TX_CMD, bus->base + ASPEED_I2C_CMD_REG);
+		break;
+	case ASPEED_I2C_SLAVE_WRITE_REQUESTED:
+		bus->slave_state = ASPEED_I2C_SLAVE_WRITE_RECEIVED;
+		i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value);
+		break;
+	case ASPEED_I2C_SLAVE_WRITE_RECEIVED:
+		i2c_slave_event(slave, I2C_SLAVE_WRITE_RECEIVED, &value);
+		break;
+	case ASPEED_I2C_SLAVE_STOP:
+		i2c_slave_event(slave, I2C_SLAVE_STOP, &value);
+		break;
+	default:
+		dev_err(bus->dev, "unhandled slave_state: %d\n",
+			bus->slave_state);
+		break;
+	}
+
+	if (status_ack != irq_status)
+		dev_err(bus->dev,
+			"irq handled != irq. expected %x, but was %x\n",
+			irq_status, status_ack);
+	writel(status_ack, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+out:
+	spin_unlock(&bus->lock);
+	return irq_handled;
+}
+#endif /* CONFIG_I2C_SLAVE */
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus)
+{
+	u32 command = ASPEED_I2CD_M_START_CMD | ASPEED_I2CD_M_TX_CMD;
+	struct i2c_msg *msg = &bus->msgs[bus->msgs_index];
+	u8 slave_addr = msg->addr << 1;
+
+	bus->master_state = ASPEED_I2C_MASTER_START;
+	bus->buf_index = 0;
+
+	if (msg->flags & I2C_M_RD) {
+		slave_addr |= 1;
+		command |= ASPEED_I2CD_M_RX_CMD;
+		/* Need to let the hardware know to NACK after RX. */
+		if (msg->len == 1 && !(msg->flags & I2C_M_RECV_LEN))
+			command |= ASPEED_I2CD_M_S_RX_CMD_LAST;
+	}
+
+	writel(slave_addr, bus->base + ASPEED_I2C_BYTE_BUF_REG);
+	writel(command, bus->base + ASPEED_I2C_CMD_REG);
+}
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_do_stop(struct aspeed_i2c_bus *bus)
+{
+	bus->master_state = ASPEED_I2C_MASTER_STOP;
+	writel(ASPEED_I2CD_M_STOP_CMD, bus->base + ASPEED_I2C_CMD_REG);
+}
+
+/* precondition: bus.lock has been acquired. */
+static void aspeed_i2c_next_msg_or_stop(struct aspeed_i2c_bus *bus)
+{
+	if (bus->msgs_index + 1 < bus->msgs_count) {
+		bus->msgs_index++;
+		aspeed_i2c_do_start(bus);
+	} else {
+		aspeed_i2c_do_stop(bus);
+	}
+}
+
+static int aspeed_i2c_is_irq_error(u32 irq_status)
+{
+	if (irq_status & ASPEED_I2CD_INTR_ARBIT_LOSS)
+		return -EAGAIN;
+	if (irq_status & (ASPEED_I2CD_INTR_SDA_DL_TIMEOUT |
+			  ASPEED_I2CD_INTR_SCL_TIMEOUT))
+		return -EBUSY;
+	if (irq_status & (ASPEED_I2CD_INTR_ABNORMAL))
+		return -EPROTO;
+
+	return 0;
+}
+
+static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
+{
+	u32 irq_status, status_ack = 0, command = 0;
+	struct i2c_msg *msg;
+	u8 recv_byte;
+	int ret;
+
+	spin_lock(&bus->lock);
+	irq_status = readl(bus->base + ASPEED_I2C_INTR_STS_REG);
+	/* Ack all interrupt bits. */
+	writel(irq_status, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	if (irq_status & ASPEED_I2CD_INTR_BUS_RECOVER_DONE) {
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		status_ack |= ASPEED_I2CD_INTR_BUS_RECOVER_DONE;
+		goto out_complete;
+	}
+
+	/*
+	 * We encountered an interrupt that reports an error: the hardware
+	 * should clear the command queue effectively taking us back to the
+	 * INACTIVE state.
+	 */
+	ret = aspeed_i2c_is_irq_error(irq_status);
+	if (ret < 0) {
+		dev_dbg(bus->dev, "received error interrupt: 0x%08x",
+			irq_status);
+		bus->cmd_err = ret;
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		goto out_complete;
+	}
+
+	/* We are in an invalid state; reset bus to a known state. */
+	if (!bus->msgs && bus->master_state != ASPEED_I2C_MASTER_STOP) {
+		dev_err(bus->dev, "bus in unknown state");
+		bus->cmd_err = -EIO;
+		aspeed_i2c_do_stop(bus);
+		goto out_no_complete;
+	}
+	msg = &bus->msgs[bus->msgs_index];
+
+	/*
+	 * START is a special case because we still have to handle a subsequent
+	 * TX or RX immediately after we handle it, so we handle it here and
+	 * then update the state and handle the new state below.
+	 */
+	if (bus->master_state == ASPEED_I2C_MASTER_START) {
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+			pr_devel("no slave present at %02x", msg->addr);
+			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+			bus->cmd_err = -ENXIO;
+			aspeed_i2c_do_stop(bus);
+			goto out_no_complete;
+		}
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		if (msg->len == 0) { /* SMBUS_QUICK */
+			aspeed_i2c_do_stop(bus);
+			goto out_no_complete;
+		}
+		if (msg->flags & I2C_M_RD)
+			bus->master_state = ASPEED_I2C_MASTER_RX_FIRST;
+		else
+			bus->master_state = ASPEED_I2C_MASTER_TX_FIRST;
+	}
+
+	switch (bus->master_state) {
+	case ASPEED_I2C_MASTER_TX:
+		if (unlikely(irq_status & ASPEED_I2CD_INTR_TX_NAK)) {
+			dev_dbg(bus->dev, "slave NACKed TX");
+			status_ack |= ASPEED_I2CD_INTR_TX_NAK;
+			goto error_and_stop;
+		} else if (unlikely(!(irq_status & ASPEED_I2CD_INTR_TX_ACK))) {
+			dev_err(bus->dev, "slave failed to ACK TX");
+			goto error_and_stop;
+		}
+		status_ack |= ASPEED_I2CD_INTR_TX_ACK;
+		/* fallthrough intended */
+	case ASPEED_I2C_MASTER_TX_FIRST:
+		if (bus->buf_index < msg->len) {
+			bus->master_state = ASPEED_I2C_MASTER_TX;
+			writel(msg->buf[bus->buf_index++],
+			       bus->base + ASPEED_I2C_BYTE_BUF_REG);
+			writel(ASPEED_I2CD_M_TX_CMD,
+			       bus->base + ASPEED_I2C_CMD_REG);
+		} else {
+			aspeed_i2c_next_msg_or_stop(bus);
+		}
+		goto out_no_complete;
+	case ASPEED_I2C_MASTER_RX_FIRST:
+		/* RX may not have completed yet (only address cycle) */
+		if (!(irq_status & ASPEED_I2CD_INTR_RX_DONE))
+			goto out_no_complete;
+		/* fallthrough intended */
+	case ASPEED_I2C_MASTER_RX:
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_RX_DONE))) {
+			dev_err(bus->dev, "master failed to RX");
+			goto error_and_stop;
+		}
+		status_ack |= ASPEED_I2CD_INTR_RX_DONE;
+
+		recv_byte = readl(bus->base + ASPEED_I2C_BYTE_BUF_REG) >> 8;
+		msg->buf[bus->buf_index++] = recv_byte;
+
+		if (msg->flags & I2C_M_RECV_LEN) {
+			if (unlikely(recv_byte > I2C_SMBUS_BLOCK_MAX)) {
+				bus->cmd_err = -EPROTO;
+				aspeed_i2c_do_stop(bus);
+				goto out_no_complete;
+			}
+			msg->len = recv_byte +
+					((msg->flags & I2C_CLIENT_PEC) ? 2 : 1);
+			msg->flags &= ~I2C_M_RECV_LEN;
+		}
+
+		if (bus->buf_index < msg->len) {
+			bus->master_state = ASPEED_I2C_MASTER_RX;
+			command = ASPEED_I2CD_M_RX_CMD;
+			if (bus->buf_index + 1 == msg->len)
+				command |= ASPEED_I2CD_M_S_RX_CMD_LAST;
+			writel(command, bus->base + ASPEED_I2C_CMD_REG);
+		} else {
+			aspeed_i2c_next_msg_or_stop(bus);
+		}
+		goto out_no_complete;
+	case ASPEED_I2C_MASTER_STOP:
+		if (unlikely(!(irq_status & ASPEED_I2CD_INTR_NORMAL_STOP))) {
+			dev_err(bus->dev, "master failed to STOP");
+			bus->cmd_err = -EIO;
+			/* Do not STOP as we have already tried. */
+		} else {
+			status_ack |= ASPEED_I2CD_INTR_NORMAL_STOP;
+		}
+
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		goto out_complete;
+	case ASPEED_I2C_MASTER_INACTIVE:
+		dev_err(bus->dev,
+			"master received interrupt 0x%08x, but is inactive",
+			irq_status);
+		bus->cmd_err = -EIO;
+		/* Do not STOP as we should be inactive. */
+		goto out_complete;
+	default:
+		WARN(1, "unknown master state\n");
+		bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+		bus->cmd_err = -EINVAL;
+		goto out_complete;
+	}
+error_and_stop:
+	bus->cmd_err = -EIO;
+	aspeed_i2c_do_stop(bus);
+	goto out_no_complete;
+out_complete:
+	bus->msgs = NULL;
+	if (bus->cmd_err)
+		bus->master_xfer_result = bus->cmd_err;
+	else
+		bus->master_xfer_result = bus->msgs_index + 1;
+	complete(&bus->cmd_complete);
+out_no_complete:
+	if (irq_status != status_ack)
+		dev_err(bus->dev,
+			"irq handled != irq. expected 0x%08x, but was 0x%08x\n",
+			irq_status, status_ack);
+	spin_unlock(&bus->lock);
+	return !!irq_status;
+}
+
+static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
+{
+	struct aspeed_i2c_bus *bus = dev_id;
+
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	if (aspeed_i2c_slave_irq(bus)) {
+		dev_dbg(bus->dev, "irq handled by slave.\n");
+		return IRQ_HANDLED;
+	}
+#endif /* CONFIG_I2C_SLAVE */
+
+	return aspeed_i2c_master_irq(bus) ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
+				  struct i2c_msg *msgs, int num)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(adap);
+	unsigned long time_left, flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	bus->cmd_err = 0;
+
+	/* If bus is busy, attempt recovery. We assume a single master
+	 * environment.
+	 */
+	if (readl(bus->base + ASPEED_I2C_CMD_REG) & ASPEED_I2CD_BUS_BUSY_STS) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		ret = aspeed_i2c_recover_bus(bus);
+		if (ret)
+			return ret;
+		spin_lock_irqsave(&bus->lock, flags);
+	}
+
+	bus->cmd_err = 0;
+	bus->msgs = msgs;
+	bus->msgs_index = 0;
+	bus->msgs_count = num;
+
+	reinit_completion(&bus->cmd_complete);
+	aspeed_i2c_do_start(bus);
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	time_left = wait_for_completion_timeout(&bus->cmd_complete,
+						bus->adap.timeout);
+
+	if (time_left == 0)
+		return -ETIMEDOUT;
+	else
+		return bus->master_xfer_result;
+}
+
+static u32 aspeed_i2c_functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+/* precondition: bus.lock has been acquired. */
+static void __aspeed_i2c_reg_slave(struct aspeed_i2c_bus *bus, u16 slave_addr)
+{
+	u32 addr_reg_val, func_ctrl_reg_val;
+
+	/* Set slave addr. */
+	addr_reg_val = readl(bus->base + ASPEED_I2C_DEV_ADDR_REG);
+	addr_reg_val &= ~ASPEED_I2CD_DEV_ADDR_MASK;
+	addr_reg_val |= slave_addr & ASPEED_I2CD_DEV_ADDR_MASK;
+	writel(addr_reg_val, bus->base + ASPEED_I2C_DEV_ADDR_REG);
+
+	/* Turn on slave mode. */
+	func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	func_ctrl_reg_val |= ASPEED_I2CD_SLAVE_EN;
+	writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+}
+
+static int aspeed_i2c_reg_slave(struct i2c_client *client)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(client->adapter);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	if (bus->slave) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		return -EINVAL;
+	}
+
+	__aspeed_i2c_reg_slave(bus, client->addr);
+
+	bus->slave = client;
+	bus->slave_state = ASPEED_I2C_SLAVE_STOP;
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return 0;
+}
+
+static int aspeed_i2c_unreg_slave(struct i2c_client *client)
+{
+	struct aspeed_i2c_bus *bus = i2c_get_adapdata(client->adapter);
+	u32 func_ctrl_reg_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+	if (!bus->slave) {
+		spin_unlock_irqrestore(&bus->lock, flags);
+		return -EINVAL;
+	}
+
+	/* Turn off slave mode. */
+	func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	func_ctrl_reg_val &= ~ASPEED_I2CD_SLAVE_EN;
+	writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+	bus->slave = NULL;
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return 0;
+}
+#endif /* CONFIG_I2C_SLAVE */
+
+static const struct i2c_algorithm aspeed_i2c_algo = {
+	.master_xfer	= aspeed_i2c_master_xfer,
+	.functionality	= aspeed_i2c_functionality,
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	.reg_slave	= aspeed_i2c_reg_slave,
+	.unreg_slave	= aspeed_i2c_unreg_slave,
+#endif /* CONFIG_I2C_SLAVE */
+};
+
+static u32 aspeed_i2c_get_clk_reg_val(u32 divisor)
+{
+	u32 base_clk, clk_high, clk_low, tmp;
+
+	/*
+	 * The actual clock frequency of SCL is:
+	 *	SCL_freq = APB_freq / (base_freq * (SCL_high + SCL_low))
+	 *		 = APB_freq / divisor
+	 * where base_freq is a programmable clock divider; its value is
+	 *	base_freq = 1 << base_clk
+	 * SCL_high is the number of base_freq clock cycles that SCL stays high
+	 * and SCL_low is the number of base_freq clock cycles that SCL stays
+	 * low for a period of SCL.
+	 * The actual register has a minimum SCL_high and SCL_low minimum of 1;
+	 * thus, they start counting at zero. So
+	 *	SCL_high = clk_high + 1
+	 *	SCL_low	 = clk_low + 1
+	 * Thus,
+	 *	SCL_freq = APB_freq /
+	 *		((1 << base_clk) * (clk_high + 1 + clk_low + 1))
+	 * The documentation recommends clk_high >= 8 and clk_low >= 7 when
+	 * possible; this last constraint gives us the following solution:
+	 */
+	base_clk = divisor > 33 ? ilog2((divisor - 1) / 32) + 1 : 0;
+	tmp = divisor / (1 << base_clk);
+	clk_high = tmp / 2 + tmp % 2;
+	clk_low = tmp - clk_high;
+
+	clk_high -= 1;
+	clk_low -= 1;
+
+	return ((clk_high << ASPEED_I2CD_TIME_SCL_HIGH_SHIFT)
+		& ASPEED_I2CD_TIME_SCL_HIGH_MASK)
+			| ((clk_low << ASPEED_I2CD_TIME_SCL_LOW_SHIFT)
+			   & ASPEED_I2CD_TIME_SCL_LOW_MASK)
+			| (base_clk & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK);
+}
+
+/* precondition: bus.lock has been acquired. */
+static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus)
+{
+	u32 divisor, clk_reg_val;
+
+	divisor = bus->parent_clk_frequency / bus->bus_frequency;
+	clk_reg_val = aspeed_i2c_get_clk_reg_val(divisor);
+	writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1);
+	writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2);
+
+	return 0;
+}
+
+/* precondition: bus.lock has been acquired. */
+static int aspeed_i2c_init(struct aspeed_i2c_bus *bus,
+			     struct platform_device *pdev)
+{
+	u32 fun_ctrl_reg = ASPEED_I2CD_MASTER_EN;
+	int ret;
+
+	/* Disable everything. */
+	writel(0, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+	ret = aspeed_i2c_init_clk(bus);
+	if (ret < 0)
+		return ret;
+
+	if (!of_property_read_bool(pdev->dev.of_node, "multi-master"))
+		fun_ctrl_reg |= ASPEED_I2CD_MULTI_MASTER_DIS;
+
+	/* Enable Master Mode */
+	writel(readl(bus->base + ASPEED_I2C_FUN_CTRL_REG) | fun_ctrl_reg,
+	       bus->base + ASPEED_I2C_FUN_CTRL_REG);
+
+#if IS_ENABLED(CONFIG_I2C_SLAVE)
+	/* If slave has already been registered, re-enable it. */
+	if (bus->slave)
+		__aspeed_i2c_reg_slave(bus, bus->slave->addr);
+#endif /* CONFIG_I2C_SLAVE */
+
+	/* Set interrupt generation of I2C controller */
+	writel(ASPEED_I2CD_INTR_ALL, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+
+	return 0;
+}
+
+static int aspeed_i2c_reset(struct aspeed_i2c_bus *bus)
+{
+	struct platform_device *pdev = to_platform_device(bus->dev);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&bus->lock, flags);
+
+	/* Disable and ack all interrupts. */
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+	writel(0xffffffff, bus->base + ASPEED_I2C_INTR_STS_REG);
+
+	ret = aspeed_i2c_init(bus, pdev);
+
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	return ret;
+}
+
+static int aspeed_i2c_probe_bus(struct platform_device *pdev)
+{
+	struct aspeed_i2c_bus *bus;
+	struct clk *parent_clk;
+	struct resource *res;
+	int irq, ret;
+
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	bus->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(bus->base))
+		return PTR_ERR(bus->base);
+
+	parent_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(parent_clk))
+		return PTR_ERR(parent_clk);
+	bus->parent_clk_frequency = clk_get_rate(parent_clk);
+	/* We just need the clock rate, we don't actually use the clk object. */
+	devm_clk_put(&pdev->dev, parent_clk);
+
+	ret = of_property_read_u32(pdev->dev.of_node,
+				   "bus-frequency", &bus->bus_frequency);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Could not read bus-frequency property\n");
+		bus->bus_frequency = 100000;
+	}
+
+	/* Initialize the I2C adapter */
+	spin_lock_init(&bus->lock);
+	init_completion(&bus->cmd_complete);
+	bus->adap.owner = THIS_MODULE;
+	bus->adap.retries = 0;
+	bus->adap.timeout = 5 * HZ;
+	bus->adap.algo = &aspeed_i2c_algo;
+	bus->adap.dev.parent = &pdev->dev;
+	bus->adap.dev.of_node = pdev->dev.of_node;
+	strlcpy(bus->adap.name, pdev->name, sizeof(bus->adap.name));
+	i2c_set_adapdata(&bus->adap, bus);
+
+	bus->dev = &pdev->dev;
+
+	/* Clean up any left over interrupt state. */
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+	writel(0xffffffff, bus->base + ASPEED_I2C_INTR_STS_REG);
+	/*
+	 * bus.lock does not need to be held because the interrupt handler has
+	 * not been enabled yet.
+	 */
+	ret = aspeed_i2c_init(bus, pdev);
+	if (ret < 0)
+		return ret;
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	ret = devm_request_irq(&pdev->dev, irq, aspeed_i2c_bus_irq,
+			       0, dev_name(&pdev->dev), bus);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_add_adapter(&bus->adap);
+	if (ret < 0)
+		return ret;
+
+	platform_set_drvdata(pdev, bus);
+
+	dev_info(bus->dev, "i2c bus %d registered, irq %d\n",
+		 bus->adap.nr, irq);
+
+	return 0;
+}
+
+static int aspeed_i2c_remove_bus(struct platform_device *pdev)
+{
+	struct aspeed_i2c_bus *bus = platform_get_drvdata(pdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bus->lock, flags);
+
+	/* Disable everything. */
+	writel(0, bus->base + ASPEED_I2C_FUN_CTRL_REG);
+	writel(0, bus->base + ASPEED_I2C_INTR_CTRL_REG);
+
+	spin_unlock_irqrestore(&bus->lock, flags);
+
+	i2c_del_adapter(&bus->adap);
+
+	return 0;
+}
+
+static const struct of_device_id aspeed_i2c_bus_of_table[] = {
+	{ .compatible = "aspeed,ast2400-i2c-bus", },
+	{ .compatible = "aspeed,ast2500-i2c-bus", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, aspeed_i2c_bus_of_table);
+
+static struct platform_driver aspeed_i2c_bus_driver = {
+	.probe		= aspeed_i2c_probe_bus,
+	.remove		= aspeed_i2c_remove_bus,
+	.driver		= {
+		.name		= "aspeed-i2c-bus",
+		.of_match_table	= aspeed_i2c_bus_of_table,
+	},
+};
+module_platform_driver(aspeed_i2c_bus_driver);
+
+MODULE_AUTHOR("Brendan Higgins <brendanhiggins@google.com>");
+MODULE_DESCRIPTION("Aspeed I2C Bus Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index fabbb9e..38dd61d 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c

@@ -274,7 +274,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
 		if (!dev->use_alt_cmd)
 			at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
-	dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
+	dev_dbg(dev->dev, "wrote 0x%x, to go %zu\n", *dev->buf, dev->buf_len);
 
 	++dev->buf;
 }
@@ -402,7 +402,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 			dev->msg->flags &= ~I2C_M_RECV_LEN;
 			dev->buf_len += *dev->buf;
 			dev->msg->len = dev->buf_len + 1;
-			dev_dbg(dev->dev, "received block length %d\n",
+			dev_dbg(dev->dev, "received block length %zu\n",
 					 dev->buf_len);
 		} else {
 			/* abort and send the stop by reading one more byte */
@@ -415,7 +415,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
 	if (!dev->use_alt_cmd && dev->buf_len == 1)
 		at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
-	dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
+	dev_dbg(dev->dev, "read 0x%x, to go %zu\n", *dev->buf, dev->buf_len);
 
 	++dev->buf;
 }
@@ -622,7 +622,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
 	 * writing the corresponding bit into the Control Register.
 	 */
 
-	dev_dbg(dev->dev, "transfer: %s %d bytes.\n",
+	dev_dbg(dev->dev, "transfer: %s %zu bytes.\n",
 		(dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len);
 
 	reinit_completion(&dev->cmd_complete);
@@ -1083,12 +1083,16 @@ static int at91_twi_probe(struct platform_device *pdev)
 		dev_err(dev->dev, "no clock defined\n");
 		return -ENODEV;
 	}
-	clk_prepare_enable(dev->clk);
+	rc = clk_prepare_enable(dev->clk);
+	if (rc)
+		return rc;
 
 	if (dev->dev->of_node) {
 		rc = at91_twi_configure_dma(dev, phy_addr);
-		if (rc == -EPROBE_DEFER)
+		if (rc == -EPROBE_DEFER) {
+			clk_disable_unprepare(dev->clk);
 			return rc;
+		}
 	}
 
 	if (!of_property_read_u32(pdev->dev.of_node, "atmel,fifo-size",

diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 45d6771..75d8016 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c

@@ -405,14 +405,14 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id)
 		cdns_i2c_writereg(id->recv_count, CDNS_I2C_XFER_SIZE_OFFSET);
 	}
 
+	/* Set the slave address in address register - triggers operation */
+	cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
+						CDNS_I2C_ADDR_OFFSET);
 	/* Clear the bus hold flag if bytes to receive is less than FIFO size */
 	if (!id->bus_hold_flag &&
 		((id->p_msg->flags & I2C_M_RECV_LEN) != I2C_M_RECV_LEN) &&
 		(id->recv_count <= CDNS_I2C_FIFO_DEPTH))
 			cdns_i2c_clear_bus_hold(id);
-	/* Set the slave address in address register - triggers operation */
-	cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
-						CDNS_I2C_ADDR_OFFSET);
 	cdns_i2c_writereg(CDNS_I2C_ENABLED_INTR_MASK, CDNS_I2C_IER_OFFSET);
 }
 

diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
new file mode 100644
index 0000000..d1a6937
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware-common.c

@@ -0,0 +1,281 @@
+/*
+ * Synopsys DesignWare I2C adapter driver.
+ *
+ * Based on the TI DAVINCI I2C adapter driver.
+ *
+ * Copyright (C) 2006 Texas Instruments.
+ * Copyright (C) 2007 MontaVista Software Inc.
+ * Copyright (C) 2009 Provigent Ltd.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+
+#include "i2c-designware-core.h"
+
+static char *abort_sources[] = {
+	[ABRT_7B_ADDR_NOACK] =
+		"slave address not acknowledged (7bit mode)",
+	[ABRT_10ADDR1_NOACK] =
+		"first address byte not acknowledged (10bit mode)",
+	[ABRT_10ADDR2_NOACK] =
+		"second address byte not acknowledged (10bit mode)",
+	[ABRT_TXDATA_NOACK] =
+		"data not acknowledged",
+	[ABRT_GCALL_NOACK] =
+		"no acknowledgement for a general call",
+	[ABRT_GCALL_READ] =
+		"read after general call",
+	[ABRT_SBYTE_ACKDET] =
+		"start byte acknowledged",
+	[ABRT_SBYTE_NORSTRT] =
+		"trying to send start byte when restart is disabled",
+	[ABRT_10B_RD_NORSTRT] =
+		"trying to read when restart is disabled (10bit mode)",
+	[ABRT_MASTER_DIS] =
+		"trying to use disabled adapter",
+	[ARB_LOST] =
+		"lost arbitration",
+	[ABRT_SLAVE_FLUSH_TXFIFO] =
+		"read command so flush old data in the TX FIFO",
+	[ABRT_SLAVE_ARBLOST] =
+		"slave lost the bus while transmitting data to a remote master",
+	[ABRT_SLAVE_RD_INTX] =
+		"incorrect slave-transmitter mode configuration",
+};
+
+u32 dw_readl(struct dw_i2c_dev *dev, int offset)
+{
+	u32 value;
+
+	if (dev->flags & ACCESS_16BIT)
+		value = readw_relaxed(dev->base + offset) |
+			(readw_relaxed(dev->base + offset + 2) << 16);
+	else
+		value = readl_relaxed(dev->base + offset);
+
+	if (dev->flags & ACCESS_SWAP)
+		return swab32(value);
+	else
+		return value;
+}
+
+void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset)
+{
+	if (dev->flags & ACCESS_SWAP)
+		b = swab32(b);
+
+	if (dev->flags & ACCESS_16BIT) {
+		writew_relaxed((u16)b, dev->base + offset);
+		writew_relaxed((u16)(b >> 16), dev->base + offset + 2);
+	} else {
+		writel_relaxed(b, dev->base + offset);
+	}
+}
+
+u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
+{
+	/*
+	 * DesignWare I2C core doesn't seem to have solid strategy to meet
+	 * the tHD;STA timing spec.  Configuring _HCNT based on tHIGH spec
+	 * will result in violation of the tHD;STA spec.
+	 */
+	if (cond)
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + (1+4+3) >= IC_CLK * tHIGH
+		 *
+		 * This is based on the DW manuals, and represents an ideal
+		 * configuration.  The resulting I2C bus speed will be
+		 * faster than any of the others.
+		 *
+		 * If your hardware is free from tHD;STA issue, try this one.
+		 */
+		return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
+	else
+		/*
+		 * Conditional expression:
+		 *
+		 *   IC_[FS]S_SCL_HCNT + 3 >= IC_CLK * (tHD;STA + tf)
+		 *
+		 * This is just experimental rule; the tHD;STA period turned
+		 * out to be proportinal to (_HCNT + 3).  With this setting,
+		 * we could meet both tHIGH and tHD;STA timing specs.
+		 *
+		 * If unsure, you'd better to take this alternative.
+		 *
+		 * The reason why we need to take into account "tf" here,
+		 * is the same as described in i2c_dw_scl_lcnt().
+		 */
+		return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
+			- 3 + offset;
+}
+
+u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
+{
+	/*
+	 * Conditional expression:
+	 *
+	 *   IC_[FS]S_SCL_LCNT + 1 >= IC_CLK * (tLOW + tf)
+	 *
+	 * DW I2C core starts counting the SCL CNTs for the LOW period
+	 * of the SCL clock (tLOW) as soon as it pulls the SCL line.
+	 * In order to meet the tLOW timing spec, we need to take into
+	 * account the fall time of SCL signal (tf).  Default tf value
+	 * should be 0.3 us, for safety.
+	 */
+	return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
+}
+
+void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
+{
+	dw_writel(dev, enable, DW_IC_ENABLE);
+}
+
+void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable)
+{
+	int timeout = 100;
+
+	do {
+		__i2c_dw_enable(dev, enable);
+		if ((dw_readl(dev, DW_IC_ENABLE_STATUS) & 1) == enable)
+			return;
+
+		/*
+		 * Wait 10 times the signaling period of the highest I2C
+		 * transfer supported by the driver (for 400KHz this is
+		 * 25us) as described in the DesignWare I2C databook.
+		 */
+		usleep_range(25, 250);
+	} while (timeout--);
+
+	dev_warn(dev->dev, "timeout in %sabling adapter\n",
+		 enable ? "en" : "dis");
+}
+
+unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
+{
+	/*
+	 * Clock is not necessary if we got LCNT/HCNT values directly from
+	 * the platform code.
+	 */
+	if (WARN_ON_ONCE(!dev->get_clk_rate_khz))
+		return 0;
+	return dev->get_clk_rate_khz(dev);
+}
+
+int i2c_dw_acquire_lock(struct dw_i2c_dev *dev)
+{
+	int ret;
+
+	if (!dev->acquire_lock)
+		return 0;
+
+	ret = dev->acquire_lock(dev);
+	if (!ret)
+		return 0;
+
+	dev_err(dev->dev, "couldn't acquire bus ownership\n");
+
+	return ret;
+}
+
+void i2c_dw_release_lock(struct dw_i2c_dev *dev)
+{
+	if (dev->release_lock)
+		dev->release_lock(dev);
+}
+
+/*
+ * Waiting for bus not busy
+ */
+int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
+{
+	int timeout = TIMEOUT;
+
+	while (dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
+		if (timeout <= 0) {
+			dev_warn(dev->dev, "timeout waiting for bus ready\n");
+			return -ETIMEDOUT;
+		}
+		timeout--;
+		usleep_range(1000, 1100);
+	}
+
+	return 0;
+}
+
+int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
+{
+	unsigned long abort_source = dev->abort_source;
+	int i;
+
+	if (abort_source & DW_IC_TX_ABRT_NOACK) {
+		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+			dev_dbg(dev->dev,
+				"%s: %s\n", __func__, abort_sources[i]);
+		return -EREMOTEIO;
+	}
+
+	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
+
+	if (abort_source & DW_IC_TX_ARB_LOST)
+		return -EAGAIN;
+	else if (abort_source & DW_IC_TX_ABRT_GCALL_READ)
+		return -EINVAL; /* wrong msgs[] data */
+	else
+		return -EIO;
+}
+
+u32 i2c_dw_func(struct i2c_adapter *adap)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+
+	return dev->functionality;
+}
+
+void i2c_dw_disable(struct dw_i2c_dev *dev)
+{
+	/* Disable controller */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* Disable all interupts */
+	dw_writel(dev, 0, DW_IC_INTR_MASK);
+	dw_readl(dev, DW_IC_CLR_INTR);
+}
+
+void i2c_dw_disable_int(struct dw_i2c_dev *dev)
+{
+	dw_writel(dev, 0, DW_IC_INTR_MASK);
+}
+
+u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev)
+{
+	return dw_readl(dev, DW_IC_COMP_PARAM_1);
+}
+EXPORT_SYMBOL_GPL(i2c_dw_read_comp_param);
+
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core");
+MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
deleted file mode 100644
index c453717..0000000
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ /dev/null

@@ -1,1012 +0,0 @@
-/*
- * Synopsys DesignWare I2C adapter driver (master only).
- *
- * Based on the TI DAVINCI I2C adapter driver.
- *
- * Copyright (C) 2006 Texas Instruments.
- * Copyright (C) 2007 MontaVista Software Inc.
- * Copyright (C) 2009 Provigent Ltd.
- *
- * ----------------------------------------------------------------------------
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * ----------------------------------------------------------------------------
- *
- */
-#include <linux/export.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/pm_runtime.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include "i2c-designware-core.h"
-
-/*
- * Registers offset
- */
-#define DW_IC_CON		0x0
-#define DW_IC_TAR		0x4
-#define DW_IC_DATA_CMD		0x10
-#define DW_IC_SS_SCL_HCNT	0x14
-#define DW_IC_SS_SCL_LCNT	0x18
-#define DW_IC_FS_SCL_HCNT	0x1c
-#define DW_IC_FS_SCL_LCNT	0x20
-#define DW_IC_HS_SCL_HCNT	0x24
-#define DW_IC_HS_SCL_LCNT	0x28
-#define DW_IC_INTR_STAT		0x2c
-#define DW_IC_INTR_MASK		0x30
-#define DW_IC_RAW_INTR_STAT	0x34
-#define DW_IC_RX_TL		0x38
-#define DW_IC_TX_TL		0x3c
-#define DW_IC_CLR_INTR		0x40
-#define DW_IC_CLR_RX_UNDER	0x44
-#define DW_IC_CLR_RX_OVER	0x48
-#define DW_IC_CLR_TX_OVER	0x4c
-#define DW_IC_CLR_RD_REQ	0x50
-#define DW_IC_CLR_TX_ABRT	0x54
-#define DW_IC_CLR_RX_DONE	0x58
-#define DW_IC_CLR_ACTIVITY	0x5c
-#define DW_IC_CLR_STOP_DET	0x60
-#define DW_IC_CLR_START_DET	0x64
-#define DW_IC_CLR_GEN_CALL	0x68
-#define DW_IC_ENABLE		0x6c
-#define DW_IC_STATUS		0x70
-#define DW_IC_TXFLR		0x74
-#define DW_IC_RXFLR		0x78
-#define DW_IC_SDA_HOLD		0x7c
-#define DW_IC_TX_ABRT_SOURCE	0x80
-#define DW_IC_ENABLE_STATUS	0x9c
-#define DW_IC_COMP_PARAM_1	0xf4
-#define DW_IC_COMP_VERSION	0xf8
-#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
-#define DW_IC_COMP_TYPE		0xfc
-#define DW_IC_COMP_TYPE_VALUE	0x44570140
-
-#define DW_IC_INTR_RX_UNDER	0x001
-#define DW_IC_INTR_RX_OVER	0x002
-#define DW_IC_INTR_RX_FULL	0x004
-#define DW_IC_INTR_TX_OVER	0x008
-#define DW_IC_INTR_TX_EMPTY	0x010
-#define DW_IC_INTR_RD_REQ	0x020
-#define DW_IC_INTR_TX_ABRT	0x040
-#define DW_IC_INTR_RX_DONE	0x080
-#define DW_IC_INTR_ACTIVITY	0x100
-#define DW_IC_INTR_STOP_DET	0x200
-#define DW_IC_INTR_START_DET	0x400
-#define DW_IC_INTR_GEN_CALL	0x800
-
-#define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
-					 DW_IC_INTR_TX_EMPTY | \
-					 DW_IC_INTR_TX_ABRT | \
-					 DW_IC_INTR_STOP_DET)
-
-#define DW_IC_STATUS_ACTIVITY	0x1
-
-#define DW_IC_SDA_HOLD_RX_SHIFT		16
-#define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
-
-#define DW_IC_ERR_TX_ABRT	0x1
-
-#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
-
-#define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
-#define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
-
-/*
- * status codes
- */
-#define STATUS_IDLE			0x0
-#define STATUS_WRITE_IN_PROGRESS	0x1
-#define STATUS_READ_IN_PROGRESS		0x2
-
-#define TIMEOUT			20 /* ms */
-
-/*
- * hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
- *
- * only expected abort codes are listed here
- * refer to the datasheet for the full list
- */
-#define ABRT_7B_ADDR_NOACK	0
-#define ABRT_10ADDR1_NOACK	1
-#define ABRT_10ADDR2_NOACK	2
-#define ABRT_TXDATA_NOACK	3
-#define ABRT_GCALL_NOACK	4
-#define ABRT_GCALL_READ		5
-#define ABRT_SBYTE_ACKDET	7
-#define ABRT_SBYTE_NORSTRT	9
-#define ABRT_10B_RD_NORSTRT	10
-#define ABRT_MASTER_DIS		11
-#define ARB_LOST		12
-
-#define DW_IC_TX_ABRT_7B_ADDR_NOACK	(1UL << ABRT_7B_ADDR_NOACK)
-#define DW_IC_TX_ABRT_10ADDR1_NOACK	(1UL << ABRT_10ADDR1_NOACK)
-#define DW_IC_TX_ABRT_10ADDR2_NOACK	(1UL << ABRT_10ADDR2_NOACK)
-#define DW_IC_TX_ABRT_TXDATA_NOACK	(1UL << ABRT_TXDATA_NOACK)
-#define DW_IC_TX_ABRT_GCALL_NOACK	(1UL << ABRT_GCALL_NOACK)
-#define DW_IC_TX_ABRT_GCALL_READ	(1UL << ABRT_GCALL_READ)
-#define DW_IC_TX_ABRT_SBYTE_ACKDET	(1UL << ABRT_SBYTE_ACKDET)
-#define DW_IC_TX_ABRT_SBYTE_NORSTRT	(1UL << ABRT_SBYTE_NORSTRT)
-#define DW_IC_TX_ABRT_10B_RD_NORSTRT	(1UL << ABRT_10B_RD_NORSTRT)
-#define DW_IC_TX_ABRT_MASTER_DIS	(1UL << ABRT_MASTER_DIS)
-#define DW_IC_TX_ARB_LOST		(1UL << ARB_LOST)
-
-#define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR2_NOACK | \
-					 DW_IC_TX_ABRT_TXDATA_NOACK | \
-					 DW_IC_TX_ABRT_GCALL_NOACK)
-
-static char *abort_sources[] = {
-	[ABRT_7B_ADDR_NOACK] =
-		"slave address not acknowledged (7bit mode)",
-	[ABRT_10ADDR1_NOACK] =
-		"first address byte not acknowledged (10bit mode)",
-	[ABRT_10ADDR2_NOACK] =
-		"second address byte not acknowledged (10bit mode)",
-	[ABRT_TXDATA_NOACK] =
-		"data not acknowledged",
-	[ABRT_GCALL_NOACK] =
-		"no acknowledgement for a general call",
-	[ABRT_GCALL_READ] =
-		"read after general call",
-	[ABRT_SBYTE_ACKDET] =
-		"start byte acknowledged",
-	[ABRT_SBYTE_NORSTRT] =
-		"trying to send start byte when restart is disabled",
-	[ABRT_10B_RD_NORSTRT] =
-		"trying to read when restart is disabled (10bit mode)",
-	[ABRT_MASTER_DIS] =
-		"trying to use disabled adapter",
-	[ARB_LOST] =
-		"lost arbitration",
-};
-
-static u32 dw_readl(struct dw_i2c_dev *dev, int offset)
-{
-	u32 value;
-
-	if (dev->flags & ACCESS_16BIT)
-		value = readw_relaxed(dev->base + offset) |
-			(readw_relaxed(dev->base + offset + 2) << 16);
-	else
-		value = readl_relaxed(dev->base + offset);
-
-	if (dev->flags & ACCESS_SWAP)
-		return swab32(value);
-	else
-		return value;
-}
-
-static void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset)
-{
-	if (dev->flags & ACCESS_SWAP)
-		b = swab32(b);
-
-	if (dev->flags & ACCESS_16BIT) {
-		writew_relaxed((u16)b, dev->base + offset);
-		writew_relaxed((u16)(b >> 16), dev->base + offset + 2);
-	} else {
-		writel_relaxed(b, dev->base + offset);
-	}
-}
-
-static u32
-i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset)
-{
-	/*
-	 * DesignWare I2C core doesn't seem to have solid strategy to meet
-	 * the tHD;STA timing spec.  Configuring _HCNT based on tHIGH spec
-	 * will result in violation of the tHD;STA spec.
-	 */
-	if (cond)
-		/*
-		 * Conditional expression:
-		 *
-		 *   IC_[FS]S_SCL_HCNT + (1+4+3) >= IC_CLK * tHIGH
-		 *
-		 * This is based on the DW manuals, and represents an ideal
-		 * configuration.  The resulting I2C bus speed will be
-		 * faster than any of the others.
-		 *
-		 * If your hardware is free from tHD;STA issue, try this one.
-		 */
-		return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset;
-	else
-		/*
-		 * Conditional expression:
-		 *
-		 *   IC_[FS]S_SCL_HCNT + 3 >= IC_CLK * (tHD;STA + tf)
-		 *
-		 * This is just experimental rule; the tHD;STA period turned
-		 * out to be proportinal to (_HCNT + 3).  With this setting,
-		 * we could meet both tHIGH and tHD;STA timing specs.
-		 *
-		 * If unsure, you'd better to take this alternative.
-		 *
-		 * The reason why we need to take into account "tf" here,
-		 * is the same as described in i2c_dw_scl_lcnt().
-		 */
-		return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000
-			- 3 + offset;
-}
-
-static u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset)
-{
-	/*
-	 * Conditional expression:
-	 *
-	 *   IC_[FS]S_SCL_LCNT + 1 >= IC_CLK * (tLOW + tf)
-	 *
-	 * DW I2C core starts counting the SCL CNTs for the LOW period
-	 * of the SCL clock (tLOW) as soon as it pulls the SCL line.
-	 * In order to meet the tLOW timing spec, we need to take into
-	 * account the fall time of SCL signal (tf).  Default tf value
-	 * should be 0.3 us, for safety.
-	 */
-	return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset;
-}
-
-static void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable)
-{
-	dw_writel(dev, enable, DW_IC_ENABLE);
-}
-
-static void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable)
-{
-	int timeout = 100;
-
-	do {
-		__i2c_dw_enable(dev, enable);
-		if ((dw_readl(dev, DW_IC_ENABLE_STATUS) & 1) == enable)
-			return;
-
-		/*
-		 * Wait 10 times the signaling period of the highest I2C
-		 * transfer supported by the driver (for 400KHz this is
-		 * 25us) as described in the DesignWare I2C databook.
-		 */
-		usleep_range(25, 250);
-	} while (timeout--);
-
-	dev_warn(dev->dev, "timeout in %sabling adapter\n",
-		 enable ? "en" : "dis");
-}
-
-static unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev)
-{
-	/*
-	 * Clock is not necessary if we got LCNT/HCNT values directly from
-	 * the platform code.
-	 */
-	if (WARN_ON_ONCE(!dev->get_clk_rate_khz))
-		return 0;
-	return dev->get_clk_rate_khz(dev);
-}
-
-static int i2c_dw_acquire_lock(struct dw_i2c_dev *dev)
-{
-	int ret;
-
-	if (!dev->acquire_lock)
-		return 0;
-
-	ret = dev->acquire_lock(dev);
-	if (!ret)
-		return 0;
-
-	dev_err(dev->dev, "couldn't acquire bus ownership\n");
-
-	return ret;
-}
-
-static void i2c_dw_release_lock(struct dw_i2c_dev *dev)
-{
-	if (dev->release_lock)
-		dev->release_lock(dev);
-}
-
-/**
- * i2c_dw_init() - initialize the designware i2c master hardware
- * @dev: device private data
- *
- * This functions configures and enables the I2C master.
- * This function is called during I2C init function, and in case of timeout at
- * run time.
- */
-int i2c_dw_init(struct dw_i2c_dev *dev)
-{
-	u32 hcnt, lcnt;
-	u32 reg, comp_param1;
-	u32 sda_falling_time, scl_falling_time;
-	int ret;
-
-	ret = i2c_dw_acquire_lock(dev);
-	if (ret)
-		return ret;
-
-	reg = dw_readl(dev, DW_IC_COMP_TYPE);
-	if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) {
-		/* Configure register endianess access */
-		dev->flags |= ACCESS_SWAP;
-	} else if (reg == (DW_IC_COMP_TYPE_VALUE & 0x0000ffff)) {
-		/* Configure register access mode 16bit */
-		dev->flags |= ACCESS_16BIT;
-	} else if (reg != DW_IC_COMP_TYPE_VALUE) {
-		dev_err(dev->dev, "Unknown Synopsys component type: "
-			"0x%08x\n", reg);
-		i2c_dw_release_lock(dev);
-		return -ENODEV;
-	}
-
-	comp_param1 = dw_readl(dev, DW_IC_COMP_PARAM_1);
-
-	/* Disable the adapter */
-	__i2c_dw_enable_and_wait(dev, false);
-
-	/* set standard and fast speed deviders for high/low periods */
-
-	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
-	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
-
-	/* Set SCL timing parameters for standard-mode */
-	if (dev->ss_hcnt && dev->ss_lcnt) {
-		hcnt = dev->ss_hcnt;
-		lcnt = dev->ss_lcnt;
-	} else {
-		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
-					4000,	/* tHD;STA = tHIGH = 4.0 us */
-					sda_falling_time,
-					0,	/* 0: DW default, 1: Ideal */
-					0);	/* No offset */
-		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
-					4700,	/* tLOW = 4.7 us */
-					scl_falling_time,
-					0);	/* No offset */
-	}
-	dw_writel(dev, hcnt, DW_IC_SS_SCL_HCNT);
-	dw_writel(dev, lcnt, DW_IC_SS_SCL_LCNT);
-	dev_dbg(dev->dev, "Standard-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
-
-	/* Set SCL timing parameters for fast-mode or fast-mode plus */
-	if ((dev->clk_freq == 1000000) && dev->fp_hcnt && dev->fp_lcnt) {
-		hcnt = dev->fp_hcnt;
-		lcnt = dev->fp_lcnt;
-	} else if (dev->fs_hcnt && dev->fs_lcnt) {
-		hcnt = dev->fs_hcnt;
-		lcnt = dev->fs_lcnt;
-	} else {
-		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
-					600,	/* tHD;STA = tHIGH = 0.6 us */
-					sda_falling_time,
-					0,	/* 0: DW default, 1: Ideal */
-					0);	/* No offset */
-		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
-					1300,	/* tLOW = 1.3 us */
-					scl_falling_time,
-					0);	/* No offset */
-	}
-	dw_writel(dev, hcnt, DW_IC_FS_SCL_HCNT);
-	dw_writel(dev, lcnt, DW_IC_FS_SCL_LCNT);
-	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
-
-	if ((dev->master_cfg & DW_IC_CON_SPEED_MASK) ==
-		DW_IC_CON_SPEED_HIGH) {
-		if ((comp_param1 & DW_IC_COMP_PARAM_1_SPEED_MODE_MASK)
-			!= DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH) {
-			dev_err(dev->dev, "High Speed not supported!\n");
-			dev->master_cfg &= ~DW_IC_CON_SPEED_MASK;
-			dev->master_cfg |= DW_IC_CON_SPEED_FAST;
-		} else if (dev->hs_hcnt && dev->hs_lcnt) {
-			hcnt = dev->hs_hcnt;
-			lcnt = dev->hs_lcnt;
-			dw_writel(dev, hcnt, DW_IC_HS_SCL_HCNT);
-			dw_writel(dev, lcnt, DW_IC_HS_SCL_LCNT);
-			dev_dbg(dev->dev, "HighSpeed-mode HCNT:LCNT = %d:%d\n",
-				hcnt, lcnt);
-		}
-	}
-
-	/* Configure SDA Hold Time if required */
-	reg = dw_readl(dev, DW_IC_COMP_VERSION);
-	if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
-		if (!dev->sda_hold_time) {
-			/* Keep previous hold time setting if no one set it */
-			dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
-		}
-		/*
-		 * Workaround for avoiding TX arbitration lost in case I2C
-		 * slave pulls SDA down "too quickly" after falling egde of
-		 * SCL by enabling non-zero SDA RX hold. Specification says it
-		 * extends incoming SDA low to high transition while SCL is
-		 * high but it apprears to help also above issue.
-		 */
-		if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
-			dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
-		dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-	} else {
-		dev_warn(dev->dev,
-			"Hardware too old to adjust SDA hold time.\n");
-	}
-
-	/* Configure Tx/Rx FIFO threshold levels */
-	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
-	dw_writel(dev, 0, DW_IC_RX_TL);
-
-	/* configure the i2c master */
-	dw_writel(dev, dev->master_cfg , DW_IC_CON);
-
-	i2c_dw_release_lock(dev);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(i2c_dw_init);
-
-/*
- * Waiting for bus not busy
- */
-static int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev)
-{
-	int timeout = TIMEOUT;
-
-	while (dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_ACTIVITY) {
-		if (timeout <= 0) {
-			dev_warn(dev->dev, "timeout waiting for bus ready\n");
-			return -ETIMEDOUT;
-		}
-		timeout--;
-		usleep_range(1000, 1100);
-	}
-
-	return 0;
-}
-
-static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
-{
-	struct i2c_msg *msgs = dev->msgs;
-	u32 ic_con, ic_tar = 0;
-
-	/* Disable the adapter */
-	__i2c_dw_enable_and_wait(dev, false);
-
-	/* if the slave address is ten bit address, enable 10BITADDR */
-	ic_con = dw_readl(dev, DW_IC_CON);
-	if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
-		ic_con |= DW_IC_CON_10BITADDR_MASTER;
-		/*
-		 * If I2C_DYNAMIC_TAR_UPDATE is set, the 10-bit addressing
-		 * mode has to be enabled via bit 12 of IC_TAR register.
-		 * We set it always as I2C_DYNAMIC_TAR_UPDATE can't be
-		 * detected from registers.
-		 */
-		ic_tar = DW_IC_TAR_10BITADDR_MASTER;
-	} else {
-		ic_con &= ~DW_IC_CON_10BITADDR_MASTER;
-	}
-
-	dw_writel(dev, ic_con, DW_IC_CON);
-
-	/*
-	 * Set the slave (target) address and enable 10-bit addressing mode
-	 * if applicable.
-	 */
-	dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR);
-
-	/* enforce disabled interrupts (due to HW issues) */
-	i2c_dw_disable_int(dev);
-
-	/* Enable the adapter */
-	__i2c_dw_enable(dev, true);
-
-	/* Clear and enable interrupts */
-	dw_readl(dev, DW_IC_CLR_INTR);
-	dw_writel(dev, DW_IC_INTR_DEFAULT_MASK, DW_IC_INTR_MASK);
-}
-
-/*
- * Initiate (and continue) low level master read/write transaction.
- * This function is only called from i2c_dw_isr, and pumping i2c_msg
- * messages into the tx buffer.  Even if the size of i2c_msg data is
- * longer than the size of the tx buffer, it handles everything.
- */
-static void
-i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
-{
-	struct i2c_msg *msgs = dev->msgs;
-	u32 intr_mask;
-	int tx_limit, rx_limit;
-	u32 addr = msgs[dev->msg_write_idx].addr;
-	u32 buf_len = dev->tx_buf_len;
-	u8 *buf = dev->tx_buf;
-	bool need_restart = false;
-
-	intr_mask = DW_IC_INTR_DEFAULT_MASK;
-
-	for (; dev->msg_write_idx < dev->msgs_num; dev->msg_write_idx++) {
-		u32 flags = msgs[dev->msg_write_idx].flags;
-
-		/*
-		 * if target address has changed, we need to
-		 * reprogram the target address in the i2c
-		 * adapter when we are done with this transfer
-		 */
-		if (msgs[dev->msg_write_idx].addr != addr) {
-			dev_err(dev->dev,
-				"%s: invalid target address\n", __func__);
-			dev->msg_err = -EINVAL;
-			break;
-		}
-
-		if (msgs[dev->msg_write_idx].len == 0) {
-			dev_err(dev->dev,
-				"%s: invalid message length\n", __func__);
-			dev->msg_err = -EINVAL;
-			break;
-		}
-
-		if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
-			/* new i2c_msg */
-			buf = msgs[dev->msg_write_idx].buf;
-			buf_len = msgs[dev->msg_write_idx].len;
-
-			/* If both IC_EMPTYFIFO_HOLD_MASTER_EN and
-			 * IC_RESTART_EN are set, we must manually
-			 * set restart bit between messages.
-			 */
-			if ((dev->master_cfg & DW_IC_CON_RESTART_EN) &&
-					(dev->msg_write_idx > 0))
-				need_restart = true;
-		}
-
-		tx_limit = dev->tx_fifo_depth - dw_readl(dev, DW_IC_TXFLR);
-		rx_limit = dev->rx_fifo_depth - dw_readl(dev, DW_IC_RXFLR);
-
-		while (buf_len > 0 && tx_limit > 0 && rx_limit > 0) {
-			u32 cmd = 0;
-
-			/*
-			 * If IC_EMPTYFIFO_HOLD_MASTER_EN is set we must
-			 * manually set the stop bit. However, it cannot be
-			 * detected from the registers so we set it always
-			 * when writing/reading the last byte.
-			 */
-
-			/*
-			 * i2c-core.c always sets the buffer length of
-			 * I2C_FUNC_SMBUS_BLOCK_DATA to 1. The length will
-			 * be adjusted when receiving the first byte.
-			 * Thus we can't stop the transaction here.
-			 */
-			if (dev->msg_write_idx == dev->msgs_num - 1 &&
-			    buf_len == 1 && !(flags & I2C_M_RECV_LEN))
-				cmd |= BIT(9);
-
-			if (need_restart) {
-				cmd |= BIT(10);
-				need_restart = false;
-			}
-
-			if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
-
-				/* avoid rx buffer overrun */
-				if (dev->rx_outstanding >= dev->rx_fifo_depth)
-					break;
-
-				dw_writel(dev, cmd | 0x100, DW_IC_DATA_CMD);
-				rx_limit--;
-				dev->rx_outstanding++;
-			} else
-				dw_writel(dev, cmd | *buf++, DW_IC_DATA_CMD);
-			tx_limit--; buf_len--;
-		}
-
-		dev->tx_buf = buf;
-		dev->tx_buf_len = buf_len;
-
-		/*
-		 * Because we don't know the buffer length in the
-		 * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop
-		 * the transaction here.
-		 */
-		if (buf_len > 0 || flags & I2C_M_RECV_LEN) {
-			/* more bytes to be written */
-			dev->status |= STATUS_WRITE_IN_PROGRESS;
-			break;
-		} else
-			dev->status &= ~STATUS_WRITE_IN_PROGRESS;
-	}
-
-	/*
-	 * If i2c_msg index search is completed, we don't need TX_EMPTY
-	 * interrupt any more.
-	 */
-	if (dev->msg_write_idx == dev->msgs_num)
-		intr_mask &= ~DW_IC_INTR_TX_EMPTY;
-
-	if (dev->msg_err)
-		intr_mask = 0;
-
-	dw_writel(dev, intr_mask,  DW_IC_INTR_MASK);
-}
-
-static u8
-i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
-{
-	struct i2c_msg *msgs = dev->msgs;
-	u32 flags = msgs[dev->msg_read_idx].flags;
-
-	/*
-	 * Adjust the buffer length and mask the flag
-	 * after receiving the first byte.
-	 */
-	len += (flags & I2C_CLIENT_PEC) ? 2 : 1;
-	dev->tx_buf_len = len - min_t(u8, len, dev->rx_outstanding);
-	msgs[dev->msg_read_idx].len = len;
-	msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN;
-
-	return len;
-}
-
-static void
-i2c_dw_read(struct dw_i2c_dev *dev)
-{
-	struct i2c_msg *msgs = dev->msgs;
-	int rx_valid;
-
-	for (; dev->msg_read_idx < dev->msgs_num; dev->msg_read_idx++) {
-		u32 len;
-		u8 *buf;
-
-		if (!(msgs[dev->msg_read_idx].flags & I2C_M_RD))
-			continue;
-
-		if (!(dev->status & STATUS_READ_IN_PROGRESS)) {
-			len = msgs[dev->msg_read_idx].len;
-			buf = msgs[dev->msg_read_idx].buf;
-		} else {
-			len = dev->rx_buf_len;
-			buf = dev->rx_buf;
-		}
-
-		rx_valid = dw_readl(dev, DW_IC_RXFLR);
-
-		for (; len > 0 && rx_valid > 0; len--, rx_valid--) {
-			u32 flags = msgs[dev->msg_read_idx].flags;
-
-			*buf = dw_readl(dev, DW_IC_DATA_CMD);
-			/* Ensure length byte is a valid value */
-			if (flags & I2C_M_RECV_LEN &&
-				*buf <= I2C_SMBUS_BLOCK_MAX && *buf > 0) {
-				len = i2c_dw_recv_len(dev, *buf);
-			}
-			buf++;
-			dev->rx_outstanding--;
-		}
-
-		if (len > 0) {
-			dev->status |= STATUS_READ_IN_PROGRESS;
-			dev->rx_buf_len = len;
-			dev->rx_buf = buf;
-			return;
-		} else
-			dev->status &= ~STATUS_READ_IN_PROGRESS;
-	}
-}
-
-static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
-{
-	unsigned long abort_source = dev->abort_source;
-	int i;
-
-	if (abort_source & DW_IC_TX_ABRT_NOACK) {
-		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
-			dev_dbg(dev->dev,
-				"%s: %s\n", __func__, abort_sources[i]);
-		return -EREMOTEIO;
-	}
-
-	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
-		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
-
-	if (abort_source & DW_IC_TX_ARB_LOST)
-		return -EAGAIN;
-	else if (abort_source & DW_IC_TX_ABRT_GCALL_READ)
-		return -EINVAL; /* wrong msgs[] data */
-	else
-		return -EIO;
-}
-
-/*
- * Prepare controller for a transaction and call i2c_dw_xfer_msg
- */
-static int
-i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
-{
-	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
-	int ret;
-
-	dev_dbg(dev->dev, "%s: msgs: %d\n", __func__, num);
-
-	pm_runtime_get_sync(dev->dev);
-
-	reinit_completion(&dev->cmd_complete);
-	dev->msgs = msgs;
-	dev->msgs_num = num;
-	dev->cmd_err = 0;
-	dev->msg_write_idx = 0;
-	dev->msg_read_idx = 0;
-	dev->msg_err = 0;
-	dev->status = STATUS_IDLE;
-	dev->abort_source = 0;
-	dev->rx_outstanding = 0;
-
-	ret = i2c_dw_acquire_lock(dev);
-	if (ret)
-		goto done_nolock;
-
-	ret = i2c_dw_wait_bus_not_busy(dev);
-	if (ret < 0)
-		goto done;
-
-	/* start the transfers */
-	i2c_dw_xfer_init(dev);
-
-	/* wait for tx to complete */
-	if (!wait_for_completion_timeout(&dev->cmd_complete, adap->timeout)) {
-		dev_err(dev->dev, "controller timed out\n");
-		/* i2c_dw_init implicitly disables the adapter */
-		i2c_dw_init(dev);
-		ret = -ETIMEDOUT;
-		goto done;
-	}
-
-	/*
-	 * We must disable the adapter before returning and signaling the end
-	 * of the current transfer. Otherwise the hardware might continue
-	 * generating interrupts which in turn causes a race condition with
-	 * the following transfer.  Needs some more investigation if the
-	 * additional interrupts are a hardware bug or this driver doesn't
-	 * handle them correctly yet.
-	 */
-	__i2c_dw_enable(dev, false);
-
-	if (dev->msg_err) {
-		ret = dev->msg_err;
-		goto done;
-	}
-
-	/* no error */
-	if (likely(!dev->cmd_err && !dev->status)) {
-		ret = num;
-		goto done;
-	}
-
-	/* We have an error */
-	if (dev->cmd_err == DW_IC_ERR_TX_ABRT) {
-		ret = i2c_dw_handle_tx_abort(dev);
-		goto done;
-	}
-
-	if (dev->status)
-		dev_err(dev->dev,
-			"transfer terminated early - interrupt latency too high?\n");
-
-	ret = -EIO;
-
-done:
-	i2c_dw_release_lock(dev);
-
-done_nolock:
-	pm_runtime_mark_last_busy(dev->dev);
-	pm_runtime_put_autosuspend(dev->dev);
-
-	return ret;
-}
-
-static u32 i2c_dw_func(struct i2c_adapter *adap)
-{
-	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
-	return dev->functionality;
-}
-
-static const struct i2c_algorithm i2c_dw_algo = {
-	.master_xfer	= i2c_dw_xfer,
-	.functionality	= i2c_dw_func,
-};
-
-static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
-{
-	u32 stat;
-
-	/*
-	 * The IC_INTR_STAT register just indicates "enabled" interrupts.
-	 * Ths unmasked raw version of interrupt status bits are available
-	 * in the IC_RAW_INTR_STAT register.
-	 *
-	 * That is,
-	 *   stat = dw_readl(IC_INTR_STAT);
-	 * equals to,
-	 *   stat = dw_readl(IC_RAW_INTR_STAT) & dw_readl(IC_INTR_MASK);
-	 *
-	 * The raw version might be useful for debugging purposes.
-	 */
-	stat = dw_readl(dev, DW_IC_INTR_STAT);
-
-	/*
-	 * Do not use the IC_CLR_INTR register to clear interrupts, or
-	 * you'll miss some interrupts, triggered during the period from
-	 * dw_readl(IC_INTR_STAT) to dw_readl(IC_CLR_INTR).
-	 *
-	 * Instead, use the separately-prepared IC_CLR_* registers.
-	 */
-	if (stat & DW_IC_INTR_RX_UNDER)
-		dw_readl(dev, DW_IC_CLR_RX_UNDER);
-	if (stat & DW_IC_INTR_RX_OVER)
-		dw_readl(dev, DW_IC_CLR_RX_OVER);
-	if (stat & DW_IC_INTR_TX_OVER)
-		dw_readl(dev, DW_IC_CLR_TX_OVER);
-	if (stat & DW_IC_INTR_RD_REQ)
-		dw_readl(dev, DW_IC_CLR_RD_REQ);
-	if (stat & DW_IC_INTR_TX_ABRT) {
-		/*
-		 * The IC_TX_ABRT_SOURCE register is cleared whenever
-		 * the IC_CLR_TX_ABRT is read.  Preserve it beforehand.
-		 */
-		dev->abort_source = dw_readl(dev, DW_IC_TX_ABRT_SOURCE);
-		dw_readl(dev, DW_IC_CLR_TX_ABRT);
-	}
-	if (stat & DW_IC_INTR_RX_DONE)
-		dw_readl(dev, DW_IC_CLR_RX_DONE);
-	if (stat & DW_IC_INTR_ACTIVITY)
-		dw_readl(dev, DW_IC_CLR_ACTIVITY);
-	if (stat & DW_IC_INTR_STOP_DET)
-		dw_readl(dev, DW_IC_CLR_STOP_DET);
-	if (stat & DW_IC_INTR_START_DET)
-		dw_readl(dev, DW_IC_CLR_START_DET);
-	if (stat & DW_IC_INTR_GEN_CALL)
-		dw_readl(dev, DW_IC_CLR_GEN_CALL);
-
-	return stat;
-}
-
-/*
- * Interrupt service routine. This gets called whenever an I2C interrupt
- * occurs.
- */
-static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
-{
-	struct dw_i2c_dev *dev = dev_id;
-	u32 stat, enabled;
-
-	enabled = dw_readl(dev, DW_IC_ENABLE);
-	stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
-	dev_dbg(dev->dev, "%s: enabled=%#x stat=%#x\n", __func__, enabled, stat);
-	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
-		return IRQ_NONE;
-
-	stat = i2c_dw_read_clear_intrbits(dev);
-
-	if (stat & DW_IC_INTR_TX_ABRT) {
-		dev->cmd_err |= DW_IC_ERR_TX_ABRT;
-		dev->status = STATUS_IDLE;
-
-		/*
-		 * Anytime TX_ABRT is set, the contents of the tx/rx
-		 * buffers are flushed.  Make sure to skip them.
-		 */
-		dw_writel(dev, 0, DW_IC_INTR_MASK);
-		goto tx_aborted;
-	}
-
-	if (stat & DW_IC_INTR_RX_FULL)
-		i2c_dw_read(dev);
-
-	if (stat & DW_IC_INTR_TX_EMPTY)
-		i2c_dw_xfer_msg(dev);
-
-	/*
-	 * No need to modify or disable the interrupt mask here.
-	 * i2c_dw_xfer_msg() will take care of it according to
-	 * the current transmit status.
-	 */
-
-tx_aborted:
-	if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
-		complete(&dev->cmd_complete);
-	else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
-		/* workaround to trigger pending interrupt */
-		stat = dw_readl(dev, DW_IC_INTR_MASK);
-		i2c_dw_disable_int(dev);
-		dw_writel(dev, stat, DW_IC_INTR_MASK);
-	}
-
-	return IRQ_HANDLED;
-}
-
-void i2c_dw_disable(struct dw_i2c_dev *dev)
-{
-	/* Disable controller */
-	__i2c_dw_enable_and_wait(dev, false);
-
-	/* Disable all interupts */
-	dw_writel(dev, 0, DW_IC_INTR_MASK);
-	dw_readl(dev, DW_IC_CLR_INTR);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_disable);
-
-void i2c_dw_disable_int(struct dw_i2c_dev *dev)
-{
-	dw_writel(dev, 0, DW_IC_INTR_MASK);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_disable_int);
-
-u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev)
-{
-	return dw_readl(dev, DW_IC_COMP_PARAM_1);
-}
-EXPORT_SYMBOL_GPL(i2c_dw_read_comp_param);
-
-int i2c_dw_probe(struct dw_i2c_dev *dev)
-{
-	struct i2c_adapter *adap = &dev->adapter;
-	unsigned long irq_flags;
-	int r;
-
-	init_completion(&dev->cmd_complete);
-
-	r = i2c_dw_init(dev);
-	if (r)
-		return r;
-
-	snprintf(adap->name, sizeof(adap->name),
-		 "Synopsys DesignWare I2C adapter");
-	adap->retries = 3;
-	adap->algo = &i2c_dw_algo;
-	adap->dev.parent = dev->dev;
-	i2c_set_adapdata(adap, dev);
-
-	if (dev->pm_disabled) {
-		dev_pm_syscore_device(dev->dev, true);
-		irq_flags = IRQF_NO_SUSPEND;
-	} else {
-		irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND;
-	}
-
-	i2c_dw_disable_int(dev);
-	r = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
-			     dev_name(dev->dev), dev);
-	if (r) {
-		dev_err(dev->dev, "failure requesting irq %i: %d\n",
-			dev->irq, r);
-		return r;
-	}
-
-	/*
-	 * Increment PM usage count during adapter registration in order to
-	 * avoid possible spurious runtime suspend when adapter device is
-	 * registered to the device core and immediate resume in case bus has
-	 * registered I2C slaves that do I2C transfers in their probe.
-	 */
-	pm_runtime_get_noresume(dev->dev);
-	r = i2c_add_numbered_adapter(adap);
-	if (r)
-		dev_err(dev->dev, "failure adding adapter: %d\n", r);
-	pm_runtime_put_noidle(dev->dev);
-
-	return r;
-}
-EXPORT_SYMBOL_GPL(i2c_dw_probe);
-
-MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core");
-MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index a7cf429..9fee4c0 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h

@@ -1,5 +1,5 @@
 /*
- * Synopsys DesignWare I2C adapter driver (master only).
+ * Synopsys DesignWare I2C adapter driver.
  *
  * Based on the TI DAVINCI I2C adapter driver.
  *
@@ -37,9 +37,152 @@
 #define DW_IC_CON_SPEED_FAST		0x4
 #define DW_IC_CON_SPEED_HIGH		0x6
 #define DW_IC_CON_SPEED_MASK		0x6
+#define DW_IC_CON_10BITADDR_SLAVE		0x8
 #define DW_IC_CON_10BITADDR_MASTER	0x10
 #define DW_IC_CON_RESTART_EN		0x20
 #define DW_IC_CON_SLAVE_DISABLE		0x40
+#define DW_IC_CON_STOP_DET_IFADDRESSED		0x80
+#define DW_IC_CON_TX_EMPTY_CTRL		0x100
+#define DW_IC_CON_RX_FIFO_FULL_HLD_CTRL		0x200
+
+/*
+ * Registers offset
+ */
+#define DW_IC_CON		0x0
+#define DW_IC_TAR		0x4
+#define DW_IC_SAR		0x8
+#define DW_IC_DATA_CMD		0x10
+#define DW_IC_SS_SCL_HCNT	0x14
+#define DW_IC_SS_SCL_LCNT	0x18
+#define DW_IC_FS_SCL_HCNT	0x1c
+#define DW_IC_FS_SCL_LCNT	0x20
+#define DW_IC_HS_SCL_HCNT	0x24
+#define DW_IC_HS_SCL_LCNT	0x28
+#define DW_IC_INTR_STAT		0x2c
+#define DW_IC_INTR_MASK		0x30
+#define DW_IC_RAW_INTR_STAT	0x34
+#define DW_IC_RX_TL		0x38
+#define DW_IC_TX_TL		0x3c
+#define DW_IC_CLR_INTR		0x40
+#define DW_IC_CLR_RX_UNDER	0x44
+#define DW_IC_CLR_RX_OVER	0x48
+#define DW_IC_CLR_TX_OVER	0x4c
+#define DW_IC_CLR_RD_REQ	0x50
+#define DW_IC_CLR_TX_ABRT	0x54
+#define DW_IC_CLR_RX_DONE	0x58
+#define DW_IC_CLR_ACTIVITY	0x5c
+#define DW_IC_CLR_STOP_DET	0x60
+#define DW_IC_CLR_START_DET	0x64
+#define DW_IC_CLR_GEN_CALL	0x68
+#define DW_IC_ENABLE		0x6c
+#define DW_IC_STATUS		0x70
+#define DW_IC_TXFLR		0x74
+#define DW_IC_RXFLR		0x78
+#define DW_IC_SDA_HOLD		0x7c
+#define DW_IC_TX_ABRT_SOURCE	0x80
+#define DW_IC_ENABLE_STATUS	0x9c
+#define DW_IC_CLR_RESTART_DET	0xa8
+#define DW_IC_COMP_PARAM_1	0xf4
+#define DW_IC_COMP_VERSION	0xf8
+#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
+#define DW_IC_COMP_TYPE		0xfc
+#define DW_IC_COMP_TYPE_VALUE	0x44570140
+
+#define DW_IC_INTR_RX_UNDER	0x001
+#define DW_IC_INTR_RX_OVER	0x002
+#define DW_IC_INTR_RX_FULL	0x004
+#define DW_IC_INTR_TX_OVER	0x008
+#define DW_IC_INTR_TX_EMPTY	0x010
+#define DW_IC_INTR_RD_REQ	0x020
+#define DW_IC_INTR_TX_ABRT	0x040
+#define DW_IC_INTR_RX_DONE	0x080
+#define DW_IC_INTR_ACTIVITY	0x100
+#define DW_IC_INTR_STOP_DET	0x200
+#define DW_IC_INTR_START_DET	0x400
+#define DW_IC_INTR_GEN_CALL	0x800
+#define DW_IC_INTR_RESTART_DET	0x1000
+
+#define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
+					 DW_IC_INTR_TX_ABRT | \
+					 DW_IC_INTR_STOP_DET)
+#define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
+					 DW_IC_INTR_TX_EMPTY)
+#define DW_IC_INTR_SLAVE_MASK		(DW_IC_INTR_DEFAULT_MASK | \
+					 DW_IC_INTR_RX_DONE | \
+					 DW_IC_INTR_RX_UNDER | \
+					 DW_IC_INTR_RD_REQ)
+
+#define DW_IC_STATUS_ACTIVITY		0x1
+#define DW_IC_STATUS_TFE		BIT(2)
+#define DW_IC_STATUS_MASTER_ACTIVITY	BIT(5)
+#define DW_IC_STATUS_SLAVE_ACTIVITY	BIT(6)
+
+#define DW_IC_SDA_HOLD_RX_SHIFT		16
+#define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
+
+#define DW_IC_ERR_TX_ABRT	0x1
+
+#define DW_IC_TAR_10BITADDR_MASTER BIT(12)
+
+#define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
+#define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
+
+/*
+ * status codes
+ */
+#define STATUS_IDLE			0x0
+#define STATUS_WRITE_IN_PROGRESS	0x1
+#define STATUS_READ_IN_PROGRESS		0x2
+
+#define TIMEOUT			20 /* ms */
+
+/*
+ * operation modes
+ */
+#define DW_IC_MASTER		0
+#define DW_IC_SLAVE		1
+
+/*
+ * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
+ *
+ * Only expected abort codes are listed here
+ * refer to the datasheet for the full list
+ */
+#define ABRT_7B_ADDR_NOACK	0
+#define ABRT_10ADDR1_NOACK	1
+#define ABRT_10ADDR2_NOACK	2
+#define ABRT_TXDATA_NOACK	3
+#define ABRT_GCALL_NOACK	4
+#define ABRT_GCALL_READ		5
+#define ABRT_SBYTE_ACKDET	7
+#define ABRT_SBYTE_NORSTRT	9
+#define ABRT_10B_RD_NORSTRT	10
+#define ABRT_MASTER_DIS		11
+#define ARB_LOST		12
+#define ABRT_SLAVE_FLUSH_TXFIFO	13
+#define ABRT_SLAVE_ARBLOST	14
+#define ABRT_SLAVE_RD_INTX	15
+
+#define DW_IC_TX_ABRT_7B_ADDR_NOACK	(1UL << ABRT_7B_ADDR_NOACK)
+#define DW_IC_TX_ABRT_10ADDR1_NOACK	(1UL << ABRT_10ADDR1_NOACK)
+#define DW_IC_TX_ABRT_10ADDR2_NOACK	(1UL << ABRT_10ADDR2_NOACK)
+#define DW_IC_TX_ABRT_TXDATA_NOACK	(1UL << ABRT_TXDATA_NOACK)
+#define DW_IC_TX_ABRT_GCALL_NOACK	(1UL << ABRT_GCALL_NOACK)
+#define DW_IC_TX_ABRT_GCALL_READ	(1UL << ABRT_GCALL_READ)
+#define DW_IC_TX_ABRT_SBYTE_ACKDET	(1UL << ABRT_SBYTE_ACKDET)
+#define DW_IC_TX_ABRT_SBYTE_NORSTRT	(1UL << ABRT_SBYTE_NORSTRT)
+#define DW_IC_TX_ABRT_10B_RD_NORSTRT	(1UL << ABRT_10B_RD_NORSTRT)
+#define DW_IC_TX_ABRT_MASTER_DIS	(1UL << ABRT_MASTER_DIS)
+#define DW_IC_TX_ARB_LOST		(1UL << ARB_LOST)
+#define DW_IC_RX_ABRT_SLAVE_RD_INTX	(1UL << ABRT_SLAVE_RD_INTX)
+#define DW_IC_RX_ABRT_SLAVE_ARBLOST	(1UL << ABRT_SLAVE_ARBLOST)
+#define DW_IC_RX_ABRT_SLAVE_FLUSH_TXFIFO	(1UL << ABRT_SLAVE_FLUSH_TXFIFO)
+
+#define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
+					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
+					 DW_IC_TX_ABRT_10ADDR2_NOACK | \
+					 DW_IC_TX_ABRT_TXDATA_NOACK | \
+					 DW_IC_TX_ABRT_GCALL_NOACK)
 
 
 /**
@@ -48,8 +191,9 @@
  * @base: IO registers pointer
  * @cmd_complete: tx completion indicator
  * @clk: input reference clock
+ * @slave: represent an I2C slave device
  * @cmd_err: run time hadware error code
- * @msgs: points to an array of messages currently being transfered
+ * @msgs: points to an array of messages currently being transferred
  * @msgs_num: the number of elements in msgs
  * @msg_write_idx: the element index of the current tx message in the msgs
  *	array
@@ -64,6 +208,7 @@
  * @abort_source: copy of the TX_ABRT_SOURCE register
  * @irq: interrupt number for the i2c master
  * @adapter: i2c subsystem adapter node
+ * @slave_cfg: configuration for the slave device
  * @tx_fifo_depth: depth of the hardware tx fifo
  * @rx_fifo_depth: depth of the hardware rx fifo
  * @rx_outstanding: current master-rx elements in tx fifo
@@ -80,6 +225,10 @@
  * @acquire_lock: function to acquire a hardware lock on the bus
  * @release_lock: function to release a hardware lock on the bus
  * @pm_disabled: true if power-management should be disabled for this i2c-bus
+ * @disable: function to disable the controller
+ * @disable_int: function to disable all interrupts
+ * @init: function to initialize the I2C hardware
+ * @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE
  *
  * HCNT and LCNT parameters can be used if the platform knows more accurate
  * values than the one computed based only on the input clock frequency.
@@ -91,6 +240,7 @@ struct dw_i2c_dev {
 	struct completion	cmd_complete;
 	struct clk		*clk;
 	struct reset_control	*rst;
+	struct i2c_client		*slave;
 	u32			(*get_clk_rate_khz) (struct dw_i2c_dev *dev);
 	struct dw_pci_controller *controller;
 	int			cmd_err;
@@ -110,6 +260,7 @@ struct dw_i2c_dev {
 	struct i2c_adapter	adapter;
 	u32			functionality;
 	u32			master_cfg;
+	u32			slave_cfg;
 	unsigned int		tx_fifo_depth;
 	unsigned int		rx_fifo_depth;
 	int			rx_outstanding;
@@ -129,6 +280,10 @@ struct dw_i2c_dev {
 	int			(*acquire_lock)(struct dw_i2c_dev *dev);
 	void			(*release_lock)(struct dw_i2c_dev *dev);
 	bool			pm_disabled;
+	void			(*disable)(struct dw_i2c_dev *dev);
+	void			(*disable_int)(struct dw_i2c_dev *dev);
+	int			(*init)(struct dw_i2c_dev *dev);
+	int			mode;
 };
 
 #define ACCESS_SWAP		0x00000001
@@ -137,11 +292,28 @@ struct dw_i2c_dev {
 
 #define MODEL_CHERRYTRAIL	0x00000100
 
-extern int i2c_dw_init(struct dw_i2c_dev *dev);
-extern void i2c_dw_disable(struct dw_i2c_dev *dev);
-extern void i2c_dw_disable_int(struct dw_i2c_dev *dev);
+u32 dw_readl(struct dw_i2c_dev *dev, int offset);
+void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset);
+u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset);
+u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset);
+void __i2c_dw_enable(struct dw_i2c_dev *dev, bool enable);
+void __i2c_dw_enable_and_wait(struct dw_i2c_dev *dev, bool enable);
+unsigned long i2c_dw_clk_rate(struct dw_i2c_dev *dev);
+int i2c_dw_acquire_lock(struct dw_i2c_dev *dev);
+void i2c_dw_release_lock(struct dw_i2c_dev *dev);
+int i2c_dw_wait_bus_not_busy(struct dw_i2c_dev *dev);
+int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev);
+u32 i2c_dw_func(struct i2c_adapter *adap);
+void i2c_dw_disable(struct dw_i2c_dev *dev);
+void i2c_dw_disable_int(struct dw_i2c_dev *dev);
+
 extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev);
 extern int i2c_dw_probe(struct dw_i2c_dev *dev);
+#if IS_ENABLED(CONFIG_I2C_DESIGNWARE_SLAVE)
+extern int i2c_dw_probe_slave(struct dw_i2c_dev *dev);
+#else
+static inline int i2c_dw_probe_slave(struct dw_i2c_dev *dev) { return -EINVAL; }
+#endif
 
 #if IS_ENABLED(CONFIG_I2C_DESIGNWARE_BAYTRAIL)
 extern int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev);

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
new file mode 100644
index 0000000..418c233
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware-master.c

@@ -0,0 +1,672 @@
+/*
+ * Synopsys DesignWare I2C adapter driver (master only).
+ *
+ * Based on the TI DAVINCI I2C adapter driver.
+ *
+ * Copyright (C) 2006 Texas Instruments.
+ * Copyright (C) 2007 MontaVista Software Inc.
+ * Copyright (C) 2009 Provigent Ltd.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+
+#include "i2c-designware-core.h"
+
+static void i2c_dw_configure_fifo_master(struct dw_i2c_dev *dev)
+{
+	/* Configure Tx/Rx FIFO threshold levels */
+	dw_writel(dev, dev->tx_fifo_depth / 2, DW_IC_TX_TL);
+	dw_writel(dev, 0, DW_IC_RX_TL);
+
+	/* Configure the I2C master */
+	dw_writel(dev, dev->master_cfg, DW_IC_CON);
+}
+
+/**
+ * i2c_dw_init() - Initialize the designware I2C master hardware
+ * @dev: device private data
+ *
+ * This functions configures and enables the I2C master.
+ * This function is called during I2C init function, and in case of timeout at
+ * run time.
+ */
+static int i2c_dw_init_master(struct dw_i2c_dev *dev)
+{
+	u32 hcnt, lcnt;
+	u32 reg, comp_param1;
+	u32 sda_falling_time, scl_falling_time;
+	int ret;
+
+	ret = i2c_dw_acquire_lock(dev);
+	if (ret)
+		return ret;
+
+	reg = dw_readl(dev, DW_IC_COMP_TYPE);
+	if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) {
+		/* Configure register endianess access */
+		dev->flags |= ACCESS_SWAP;
+	} else if (reg == (DW_IC_COMP_TYPE_VALUE & 0x0000ffff)) {
+		/* Configure register access mode 16bit */
+		dev->flags |= ACCESS_16BIT;
+	} else if (reg != DW_IC_COMP_TYPE_VALUE) {
+		dev_err(dev->dev,
+			"Unknown Synopsys component type: 0x%08x\n", reg);
+		i2c_dw_release_lock(dev);
+		return -ENODEV;
+	}
+
+	comp_param1 = dw_readl(dev, DW_IC_COMP_PARAM_1);
+
+	/* Disable the adapter */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* Set standard and fast speed deviders for high/low periods */
+
+	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
+	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
+
+	/* Set SCL timing parameters for standard-mode */
+	if (dev->ss_hcnt && dev->ss_lcnt) {
+		hcnt = dev->ss_hcnt;
+		lcnt = dev->ss_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+					4000,	/* tHD;STA = tHIGH = 4.0 us */
+					sda_falling_time,
+					0,	/* 0: DW default, 1: Ideal */
+					0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+					4700,	/* tLOW = 4.7 us */
+					scl_falling_time,
+					0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_SS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_SS_SCL_LCNT);
+	dev_dbg(dev->dev, "Standard-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	/* Set SCL timing parameters for fast-mode or fast-mode plus */
+	if ((dev->clk_freq == 1000000) && dev->fp_hcnt && dev->fp_lcnt) {
+		hcnt = dev->fp_hcnt;
+		lcnt = dev->fp_lcnt;
+	} else if (dev->fs_hcnt && dev->fs_lcnt) {
+		hcnt = dev->fs_hcnt;
+		lcnt = dev->fs_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+					600,	/* tHD;STA = tHIGH = 0.6 us */
+					sda_falling_time,
+					0,	/* 0: DW default, 1: Ideal */
+					0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+					1300,	/* tLOW = 1.3 us */
+					scl_falling_time,
+					0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_FS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_FS_SCL_LCNT);
+	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	if ((dev->master_cfg & DW_IC_CON_SPEED_MASK) ==
+		DW_IC_CON_SPEED_HIGH) {
+		if ((comp_param1 & DW_IC_COMP_PARAM_1_SPEED_MODE_MASK)
+			!= DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH) {
+			dev_err(dev->dev, "High Speed not supported!\n");
+			dev->master_cfg &= ~DW_IC_CON_SPEED_MASK;
+			dev->master_cfg |= DW_IC_CON_SPEED_FAST;
+		} else if (dev->hs_hcnt && dev->hs_lcnt) {
+			hcnt = dev->hs_hcnt;
+			lcnt = dev->hs_lcnt;
+			dw_writel(dev, hcnt, DW_IC_HS_SCL_HCNT);
+			dw_writel(dev, lcnt, DW_IC_HS_SCL_LCNT);
+			dev_dbg(dev->dev, "HighSpeed-mode HCNT:LCNT = %d:%d\n",
+				hcnt, lcnt);
+		}
+	}
+
+	/* Configure SDA Hold Time if required */
+	reg = dw_readl(dev, DW_IC_COMP_VERSION);
+	if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
+		if (!dev->sda_hold_time) {
+			/* Keep previous hold time setting if no one set it */
+			dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
+		}
+		/*
+		 * Workaround for avoiding TX arbitration lost in case I2C
+		 * slave pulls SDA down "too quickly" after falling egde of
+		 * SCL by enabling non-zero SDA RX hold. Specification says it
+		 * extends incoming SDA low to high transition while SCL is
+		 * high but it apprears to help also above issue.
+		 */
+		if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
+			dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
+		dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
+	} else {
+		dev_warn(dev->dev,
+			"Hardware too old to adjust SDA hold time.\n");
+	}
+
+	i2c_dw_configure_fifo_master(dev);
+	i2c_dw_release_lock(dev);
+
+	return 0;
+}
+
+static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
+{
+	struct i2c_msg *msgs = dev->msgs;
+	u32 ic_con, ic_tar = 0;
+
+	/* Disable the adapter */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* If the slave address is ten bit address, enable 10BITADDR */
+	ic_con = dw_readl(dev, DW_IC_CON);
+	if (msgs[dev->msg_write_idx].flags & I2C_M_TEN) {
+		ic_con |= DW_IC_CON_10BITADDR_MASTER;
+		/*
+		 * If I2C_DYNAMIC_TAR_UPDATE is set, the 10-bit addressing
+		 * mode has to be enabled via bit 12 of IC_TAR register.
+		 * We set it always as I2C_DYNAMIC_TAR_UPDATE can't be
+		 * detected from registers.
+		 */
+		ic_tar = DW_IC_TAR_10BITADDR_MASTER;
+	} else {
+		ic_con &= ~DW_IC_CON_10BITADDR_MASTER;
+	}
+
+	dw_writel(dev, ic_con, DW_IC_CON);
+
+	/*
+	 * Set the slave (target) address and enable 10-bit addressing mode
+	 * if applicable.
+	 */
+	dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR);
+
+	/* Enforce disabled interrupts (due to HW issues) */
+	i2c_dw_disable_int(dev);
+
+	/* Enable the adapter */
+	__i2c_dw_enable(dev, true);
+
+	/* Clear and enable interrupts */
+	dw_readl(dev, DW_IC_CLR_INTR);
+	dw_writel(dev, DW_IC_INTR_MASTER_MASK, DW_IC_INTR_MASK);
+}
+
+/*
+ * Initiate (and continue) low level master read/write transaction.
+ * This function is only called from i2c_dw_isr, and pumping i2c_msg
+ * messages into the tx buffer.  Even if the size of i2c_msg data is
+ * longer than the size of the tx buffer, it handles everything.
+ */
+static void
+i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
+{
+	struct i2c_msg *msgs = dev->msgs;
+	u32 intr_mask;
+	int tx_limit, rx_limit;
+	u32 addr = msgs[dev->msg_write_idx].addr;
+	u32 buf_len = dev->tx_buf_len;
+	u8 *buf = dev->tx_buf;
+	bool need_restart = false;
+
+	intr_mask = DW_IC_INTR_MASTER_MASK;
+
+	for (; dev->msg_write_idx < dev->msgs_num; dev->msg_write_idx++) {
+		u32 flags = msgs[dev->msg_write_idx].flags;
+
+		/*
+		 * If target address has changed, we need to
+		 * reprogram the target address in the I2C
+		 * adapter when we are done with this transfer.
+		 */
+		if (msgs[dev->msg_write_idx].addr != addr) {
+			dev_err(dev->dev,
+				"%s: invalid target address\n", __func__);
+			dev->msg_err = -EINVAL;
+			break;
+		}
+
+		if (msgs[dev->msg_write_idx].len == 0) {
+			dev_err(dev->dev,
+				"%s: invalid message length\n", __func__);
+			dev->msg_err = -EINVAL;
+			break;
+		}
+
+		if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
+			/* new i2c_msg */
+			buf = msgs[dev->msg_write_idx].buf;
+			buf_len = msgs[dev->msg_write_idx].len;
+
+			/* If both IC_EMPTYFIFO_HOLD_MASTER_EN and
+			 * IC_RESTART_EN are set, we must manually
+			 * set restart bit between messages.
+			 */
+			if ((dev->master_cfg & DW_IC_CON_RESTART_EN) &&
+					(dev->msg_write_idx > 0))
+				need_restart = true;
+		}
+
+		tx_limit = dev->tx_fifo_depth - dw_readl(dev, DW_IC_TXFLR);
+		rx_limit = dev->rx_fifo_depth - dw_readl(dev, DW_IC_RXFLR);
+
+		while (buf_len > 0 && tx_limit > 0 && rx_limit > 0) {
+			u32 cmd = 0;
+
+			/*
+			 * If IC_EMPTYFIFO_HOLD_MASTER_EN is set we must
+			 * manually set the stop bit. However, it cannot be
+			 * detected from the registers so we set it always
+			 * when writing/reading the last byte.
+			 */
+
+			/*
+			 * i2c-core always sets the buffer length of
+			 * I2C_FUNC_SMBUS_BLOCK_DATA to 1. The length will
+			 * be adjusted when receiving the first byte.
+			 * Thus we can't stop the transaction here.
+			 */
+			if (dev->msg_write_idx == dev->msgs_num - 1 &&
+			    buf_len == 1 && !(flags & I2C_M_RECV_LEN))
+				cmd |= BIT(9);
+
+			if (need_restart) {
+				cmd |= BIT(10);
+				need_restart = false;
+			}
+
+			if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
+
+				/* Avoid rx buffer overrun */
+				if (dev->rx_outstanding >= dev->rx_fifo_depth)
+					break;
+
+				dw_writel(dev, cmd | 0x100, DW_IC_DATA_CMD);
+				rx_limit--;
+				dev->rx_outstanding++;
+			} else
+				dw_writel(dev, cmd | *buf++, DW_IC_DATA_CMD);
+			tx_limit--; buf_len--;
+		}
+
+		dev->tx_buf = buf;
+		dev->tx_buf_len = buf_len;
+
+		/*
+		 * Because we don't know the buffer length in the
+		 * I2C_FUNC_SMBUS_BLOCK_DATA case, we can't stop
+		 * the transaction here.
+		 */
+		if (buf_len > 0 || flags & I2C_M_RECV_LEN) {
+			/* more bytes to be written */
+			dev->status |= STATUS_WRITE_IN_PROGRESS;
+			break;
+		} else
+			dev->status &= ~STATUS_WRITE_IN_PROGRESS;
+	}
+
+	/*
+	 * If i2c_msg index search is completed, we don't need TX_EMPTY
+	 * interrupt any more.
+	 */
+	if (dev->msg_write_idx == dev->msgs_num)
+		intr_mask &= ~DW_IC_INTR_TX_EMPTY;
+
+	if (dev->msg_err)
+		intr_mask = 0;
+
+	dw_writel(dev, intr_mask,  DW_IC_INTR_MASK);
+}
+
+static u8
+i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
+{
+	struct i2c_msg *msgs = dev->msgs;
+	u32 flags = msgs[dev->msg_read_idx].flags;
+
+	/*
+	 * Adjust the buffer length and mask the flag
+	 * after receiving the first byte.
+	 */
+	len += (flags & I2C_CLIENT_PEC) ? 2 : 1;
+	dev->tx_buf_len = len - min_t(u8, len, dev->rx_outstanding);
+	msgs[dev->msg_read_idx].len = len;
+	msgs[dev->msg_read_idx].flags &= ~I2C_M_RECV_LEN;
+
+	return len;
+}
+
+static void
+i2c_dw_read(struct dw_i2c_dev *dev)
+{
+	struct i2c_msg *msgs = dev->msgs;
+	int rx_valid;
+
+	for (; dev->msg_read_idx < dev->msgs_num; dev->msg_read_idx++) {
+		u32 len;
+		u8 *buf;
+
+		if (!(msgs[dev->msg_read_idx].flags & I2C_M_RD))
+			continue;
+
+		if (!(dev->status & STATUS_READ_IN_PROGRESS)) {
+			len = msgs[dev->msg_read_idx].len;
+			buf = msgs[dev->msg_read_idx].buf;
+		} else {
+			len = dev->rx_buf_len;
+			buf = dev->rx_buf;
+		}
+
+		rx_valid = dw_readl(dev, DW_IC_RXFLR);
+
+		for (; len > 0 && rx_valid > 0; len--, rx_valid--) {
+			u32 flags = msgs[dev->msg_read_idx].flags;
+
+			*buf = dw_readl(dev, DW_IC_DATA_CMD);
+			/* Ensure length byte is a valid value */
+			if (flags & I2C_M_RECV_LEN &&
+				*buf <= I2C_SMBUS_BLOCK_MAX && *buf > 0) {
+				len = i2c_dw_recv_len(dev, *buf);
+			}
+			buf++;
+			dev->rx_outstanding--;
+		}
+
+		if (len > 0) {
+			dev->status |= STATUS_READ_IN_PROGRESS;
+			dev->rx_buf_len = len;
+			dev->rx_buf = buf;
+			return;
+		} else
+			dev->status &= ~STATUS_READ_IN_PROGRESS;
+	}
+}
+
+/*
+ * Prepare controller for a transaction and call i2c_dw_xfer_msg.
+ */
+static int
+i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
+	int ret;
+
+	dev_dbg(dev->dev, "%s: msgs: %d\n", __func__, num);
+
+	pm_runtime_get_sync(dev->dev);
+
+	reinit_completion(&dev->cmd_complete);
+	dev->msgs = msgs;
+	dev->msgs_num = num;
+	dev->cmd_err = 0;
+	dev->msg_write_idx = 0;
+	dev->msg_read_idx = 0;
+	dev->msg_err = 0;
+	dev->status = STATUS_IDLE;
+	dev->abort_source = 0;
+	dev->rx_outstanding = 0;
+
+	ret = i2c_dw_acquire_lock(dev);
+	if (ret)
+		goto done_nolock;
+
+	ret = i2c_dw_wait_bus_not_busy(dev);
+	if (ret < 0)
+		goto done;
+
+	/* Start the transfers */
+	i2c_dw_xfer_init(dev);
+
+	/* Wait for tx to complete */
+	if (!wait_for_completion_timeout(&dev->cmd_complete, adap->timeout)) {
+		dev_err(dev->dev, "controller timed out\n");
+		/* i2c_dw_init implicitly disables the adapter */
+		i2c_dw_init_master(dev);
+		ret = -ETIMEDOUT;
+		goto done;
+	}
+
+	/*
+	 * We must disable the adapter before returning and signaling the end
+	 * of the current transfer. Otherwise the hardware might continue
+	 * generating interrupts which in turn causes a race condition with
+	 * the following transfer.  Needs some more investigation if the
+	 * additional interrupts are a hardware bug or this driver doesn't
+	 * handle them correctly yet.
+	 */
+	__i2c_dw_enable(dev, false);
+
+	if (dev->msg_err) {
+		ret = dev->msg_err;
+		goto done;
+	}
+
+	/* No error */
+	if (likely(!dev->cmd_err && !dev->status)) {
+		ret = num;
+		goto done;
+	}
+
+	/* We have an error */
+	if (dev->cmd_err == DW_IC_ERR_TX_ABRT) {
+		ret = i2c_dw_handle_tx_abort(dev);
+		goto done;
+	}
+
+	if (dev->status)
+		dev_err(dev->dev,
+			"transfer terminated early - interrupt latency too high?\n");
+
+	ret = -EIO;
+
+done:
+	i2c_dw_release_lock(dev);
+
+done_nolock:
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return ret;
+}
+
+static const struct i2c_algorithm i2c_dw_algo = {
+	.master_xfer = i2c_dw_xfer,
+	.functionality = i2c_dw_func,
+};
+
+static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
+{
+	u32 stat;
+
+	/*
+	 * The IC_INTR_STAT register just indicates "enabled" interrupts.
+	 * Ths unmasked raw version of interrupt status bits are available
+	 * in the IC_RAW_INTR_STAT register.
+	 *
+	 * That is,
+	 *   stat = dw_readl(IC_INTR_STAT);
+	 * equals to,
+	 *   stat = dw_readl(IC_RAW_INTR_STAT) & dw_readl(IC_INTR_MASK);
+	 *
+	 * The raw version might be useful for debugging purposes.
+	 */
+	stat = dw_readl(dev, DW_IC_INTR_STAT);
+
+	/*
+	 * Do not use the IC_CLR_INTR register to clear interrupts, or
+	 * you'll miss some interrupts, triggered during the period from
+	 * dw_readl(IC_INTR_STAT) to dw_readl(IC_CLR_INTR).
+	 *
+	 * Instead, use the separately-prepared IC_CLR_* registers.
+	 */
+	if (stat & DW_IC_INTR_RX_UNDER)
+		dw_readl(dev, DW_IC_CLR_RX_UNDER);
+	if (stat & DW_IC_INTR_RX_OVER)
+		dw_readl(dev, DW_IC_CLR_RX_OVER);
+	if (stat & DW_IC_INTR_TX_OVER)
+		dw_readl(dev, DW_IC_CLR_TX_OVER);
+	if (stat & DW_IC_INTR_RD_REQ)
+		dw_readl(dev, DW_IC_CLR_RD_REQ);
+	if (stat & DW_IC_INTR_TX_ABRT) {
+		/*
+		 * The IC_TX_ABRT_SOURCE register is cleared whenever
+		 * the IC_CLR_TX_ABRT is read.  Preserve it beforehand.
+		 */
+		dev->abort_source = dw_readl(dev, DW_IC_TX_ABRT_SOURCE);
+		dw_readl(dev, DW_IC_CLR_TX_ABRT);
+	}
+	if (stat & DW_IC_INTR_RX_DONE)
+		dw_readl(dev, DW_IC_CLR_RX_DONE);
+	if (stat & DW_IC_INTR_ACTIVITY)
+		dw_readl(dev, DW_IC_CLR_ACTIVITY);
+	if (stat & DW_IC_INTR_STOP_DET)
+		dw_readl(dev, DW_IC_CLR_STOP_DET);
+	if (stat & DW_IC_INTR_START_DET)
+		dw_readl(dev, DW_IC_CLR_START_DET);
+	if (stat & DW_IC_INTR_GEN_CALL)
+		dw_readl(dev, DW_IC_CLR_GEN_CALL);
+
+	return stat;
+}
+
+/*
+ * Interrupt service routine. This gets called whenever an I2C master interrupt
+ * occurs.
+ */
+static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
+{
+	u32 stat;
+
+	stat = i2c_dw_read_clear_intrbits(dev);
+	if (stat & DW_IC_INTR_TX_ABRT) {
+		dev->cmd_err |= DW_IC_ERR_TX_ABRT;
+		dev->status = STATUS_IDLE;
+
+		/*
+		 * Anytime TX_ABRT is set, the contents of the tx/rx
+		 * buffers are flushed. Make sure to skip them.
+		 */
+		dw_writel(dev, 0, DW_IC_INTR_MASK);
+		goto tx_aborted;
+	}
+
+	if (stat & DW_IC_INTR_RX_FULL)
+		i2c_dw_read(dev);
+
+	if (stat & DW_IC_INTR_TX_EMPTY)
+		i2c_dw_xfer_msg(dev);
+
+	/*
+	 * No need to modify or disable the interrupt mask here.
+	 * i2c_dw_xfer_msg() will take care of it according to
+	 * the current transmit status.
+	 */
+
+tx_aborted:
+	if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
+		complete(&dev->cmd_complete);
+	else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
+		/* Workaround to trigger pending interrupt */
+		stat = dw_readl(dev, DW_IC_INTR_MASK);
+		i2c_dw_disable_int(dev);
+		dw_writel(dev, stat, DW_IC_INTR_MASK);
+	}
+
+	return 0;
+}
+
+static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
+{
+	struct dw_i2c_dev *dev = dev_id;
+	u32 stat, enabled;
+
+	enabled = dw_readl(dev, DW_IC_ENABLE);
+	stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
+	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
+	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
+		return IRQ_NONE;
+
+	i2c_dw_irq_handler_master(dev);
+
+	return IRQ_HANDLED;
+}
+
+int i2c_dw_probe(struct dw_i2c_dev *dev)
+{
+	struct i2c_adapter *adap = &dev->adapter;
+	unsigned long irq_flags;
+	int ret;
+
+	init_completion(&dev->cmd_complete);
+
+	dev->init = i2c_dw_init_master;
+	dev->disable = i2c_dw_disable;
+	dev->disable_int = i2c_dw_disable_int;
+
+	ret = dev->init(dev);
+	if (ret)
+		return ret;
+
+	snprintf(adap->name, sizeof(adap->name),
+		 "Synopsys DesignWare I2C adapter");
+	adap->retries = 3;
+	adap->algo = &i2c_dw_algo;
+	adap->dev.parent = dev->dev;
+	i2c_set_adapdata(adap, dev);
+
+	if (dev->pm_disabled) {
+		dev_pm_syscore_device(dev->dev, true);
+		irq_flags = IRQF_NO_SUSPEND;
+	} else {
+		irq_flags = IRQF_SHARED | IRQF_COND_SUSPEND;
+	}
+
+	i2c_dw_disable_int(dev);
+	ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
+			       dev_name(dev->dev), dev);
+	if (ret) {
+		dev_err(dev->dev, "failure requesting irq %i: %d\n",
+			dev->irq, ret);
+		return ret;
+	}
+
+	/*
+	 * Increment PM usage count during adapter registration in order to
+	 * avoid possible spurious runtime suspend when adapter device is
+	 * registered to the device core and immediate resume in case bus has
+	 * registered I2C slaves that do I2C transfers in their probe.
+	 */
+	pm_runtime_get_noresume(dev->dev);
+	ret = i2c_add_numbered_adapter(adap);
+	if (ret)
+		dev_err(dev->dev, "failure adding adapter: %d\n", ret);
+	pm_runtime_put_noidle(dev->dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_dw_probe);
+
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus master adapter");
+MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c
index ed485b6..86e1bd0 100644
--- a/drivers/i2c/busses/i2c-designware-pcidrv.c
+++ b/drivers/i2c/busses/i2c-designware-pcidrv.c

@@ -187,16 +187,19 @@ static struct dw_pci_controller dw_pci_controllers[] = {
 static int i2c_dw_pci_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
 
-	i2c_dw_disable(pci_get_drvdata(pdev));
+	i_dev->disable(i_dev);
+
 	return 0;
 }
 
 static int i2c_dw_pci_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
+	struct dw_i2c_dev *i_dev = pci_get_drvdata(pdev);
 
-	return i2c_dw_init(pci_get_drvdata(pdev));
+	return i_dev->init(i_dev);
 }
 #endif
 
@@ -296,7 +299,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev)
 {
 	struct dw_i2c_dev *dev = pci_get_drvdata(pdev);
 
-	i2c_dw_disable(dev);
+	dev->disable(dev);
 	pm_runtime_forbid(&pdev->dev);
 	pm_runtime_get_noresume(&pdev->dev);
 

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index d1263b8..2ea6d0d 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c

@@ -1,5 +1,5 @@
 /*
- * Synopsys DesignWare I2C adapter driver (master only).
+ * Synopsys DesignWare I2C adapter driver.
  *
  * Based on the TI DAVINCI I2C adapter driver.
  *
@@ -21,27 +21,28 @@
  * ----------------------------------------------------------------------------
  *
  */
-#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmi.h>
-#include <linux/i2c.h>
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
+#include <linux/platform_data/i2c-designware.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
-#include <linux/io.h>
 #include <linux/reset.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/acpi.h>
-#include <linux/platform_data/i2c-designware.h>
+
 #include "i2c-designware-core.h"
 
 static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev)
@@ -171,6 +172,49 @@ static inline int dw_i2c_acpi_configure(struct platform_device *pdev)
 }
 #endif
 
+static void i2c_dw_configure_master(struct dw_i2c_dev *dev)
+{
+	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
+
+	dev->master_cfg = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
+			  DW_IC_CON_RESTART_EN;
+
+	dev->mode = DW_IC_MASTER;
+
+	switch (dev->clk_freq) {
+	case 100000:
+		dev->master_cfg |= DW_IC_CON_SPEED_STD;
+		break;
+	case 3400000:
+		dev->master_cfg |= DW_IC_CON_SPEED_HIGH;
+		break;
+	default:
+		dev->master_cfg |= DW_IC_CON_SPEED_FAST;
+	}
+}
+
+static void i2c_dw_configure_slave(struct dw_i2c_dev *dev)
+{
+	dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY;
+
+	dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL |
+			 DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED |
+			 DW_IC_CON_SPEED_FAST;
+
+	dev->mode = DW_IC_SLAVE;
+
+	switch (dev->clk_freq) {
+	case 100000:
+		dev->slave_cfg |= DW_IC_CON_SPEED_STD;
+		break;
+	case 3400000:
+		dev->slave_cfg |= DW_IC_CON_SPEED_HIGH;
+		break;
+	default:
+		dev->slave_cfg |= DW_IC_CON_SPEED_FAST;
+	}
+}
+
 static int i2c_dw_plat_prepare_clk(struct dw_i2c_dev *i_dev, bool prepare)
 {
 	if (IS_ERR(i_dev->clk))
@@ -209,11 +253,11 @@ static void dw_i2c_set_fifo_size(struct dw_i2c_dev *dev, int id)
 static int dw_i2c_plat_probe(struct platform_device *pdev)
 {
 	struct dw_i2c_platform_data *pdata = dev_get_platdata(&pdev->dev);
-	struct dw_i2c_dev *dev;
 	struct i2c_adapter *adap;
-	struct resource *mem;
-	int irq, r;
+	struct dw_i2c_dev *dev;
 	u32 acpi_speed, ht = 0;
+	struct resource *mem;
+	int irq, ret;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -276,29 +320,18 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	    && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
 		dev_err(&pdev->dev,
 			"Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
-		r = -EINVAL;
+		ret = -EINVAL;
 		goto exit_reset;
 	}
 
-	r = i2c_dw_probe_lock_support(dev);
-	if (r)
+	ret = i2c_dw_probe_lock_support(dev);
+	if (ret)
 		goto exit_reset;
 
-	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
-
-	dev->master_cfg = DW_IC_CON_MASTER | DW_IC_CON_SLAVE_DISABLE |
-			  DW_IC_CON_RESTART_EN;
-
-	switch (dev->clk_freq) {
-	case 100000:
-		dev->master_cfg |= DW_IC_CON_SPEED_STD;
-		break;
-	case 3400000:
-		dev->master_cfg |= DW_IC_CON_SPEED_HIGH;
-		break;
-	default:
-		dev->master_cfg |= DW_IC_CON_SPEED_FAST;
-	}
+	if (i2c_detect_slave_mode(&pdev->dev))
+		i2c_dw_configure_slave(dev);
+	else
+		i2c_dw_configure_master(dev);
 
 	dev->clk = devm_clk_get(&pdev->dev, NULL);
 	if (!i2c_dw_plat_prepare_clk(dev, true)) {
@@ -327,11 +360,15 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 		pm_runtime_enable(&pdev->dev);
 	}
 
-	r = i2c_dw_probe(dev);
-	if (r)
+	if (dev->mode == DW_IC_SLAVE)
+		ret = i2c_dw_probe_slave(dev);
+	else
+		ret = i2c_dw_probe(dev);
+
+	if (ret)
 		goto exit_probe;
 
-	return r;
+	return ret;
 
 exit_probe:
 	if (!dev->pm_disabled)
@@ -339,7 +376,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 exit_reset:
 	if (!IS_ERR_OR_NULL(dev->rst))
 		reset_control_assert(dev->rst);
-	return r;
+	return ret;
 }
 
 static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -350,7 +387,7 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&dev->adapter);
 
-	i2c_dw_disable(dev);
+	dev->disable(dev);
 
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
@@ -394,7 +431,7 @@ static int dw_i2c_plat_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
-	i2c_dw_disable(i_dev);
+	i_dev->disable(i_dev);
 	i2c_dw_plat_prepare_clk(i_dev, false);
 
 	return 0;
@@ -406,7 +443,7 @@ static int dw_i2c_plat_resume(struct device *dev)
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
 
 	i2c_dw_plat_prepare_clk(i_dev, true);
-	i2c_dw_init(i_dev);
+	i_dev->init(i_dev);
 
 	return 0;
 }
@@ -423,7 +460,7 @@ static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
 #define DW_I2C_DEV_PMOPS NULL
 #endif
 
-/* work with hotplug and coldplug */
+/* Work with hotplug and coldplug */
 MODULE_ALIAS("platform:i2c_designware");
 
 static struct platform_driver dw_i2c_driver = {

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
new file mode 100644
index 0000000..0548c7e
--- /dev/null
+++ b/drivers/i2c/busses/i2c-designware-slave.c

@@ -0,0 +1,393 @@
+/*
+ * Synopsys DesignWare I2C adapter driver (slave only).
+ *
+ * Based on the Synopsys DesignWare I2C adapter driver (master).
+ *
+ * Copyright (C) 2016 Synopsys Inc.
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * ----------------------------------------------------------------------------
+ *
+ */
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+
+#include "i2c-designware-core.h"
+
+static void i2c_dw_configure_fifo_slave(struct dw_i2c_dev *dev)
+{
+	/* Configure Tx/Rx FIFO threshold levels. */
+	dw_writel(dev, 0, DW_IC_TX_TL);
+	dw_writel(dev, 0, DW_IC_RX_TL);
+
+	/* Configure the I2C slave. */
+	dw_writel(dev, dev->slave_cfg, DW_IC_CON);
+	dw_writel(dev, DW_IC_INTR_SLAVE_MASK, DW_IC_INTR_MASK);
+}
+
+/**
+ * i2c_dw_init_slave() - Initialize the designware i2c slave hardware
+ * @dev: device private data
+ *
+ * This function configures and enables the I2C in slave mode.
+ * This function is called during I2C init function, and in case of timeout at
+ * run time.
+ */
+static int i2c_dw_init_slave(struct dw_i2c_dev *dev)
+{
+	u32 sda_falling_time, scl_falling_time;
+	u32 reg, comp_param1;
+	u32 hcnt, lcnt;
+	int ret;
+
+	ret = i2c_dw_acquire_lock(dev);
+	if (ret)
+		return ret;
+
+	reg = dw_readl(dev, DW_IC_COMP_TYPE);
+	if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) {
+		/* Configure register endianness access. */
+		dev->flags |= ACCESS_SWAP;
+	} else if (reg == (DW_IC_COMP_TYPE_VALUE & 0x0000ffff)) {
+		/* Configure register access mode 16bit. */
+		dev->flags |= ACCESS_16BIT;
+	} else if (reg != DW_IC_COMP_TYPE_VALUE) {
+		dev_err(dev->dev,
+			"Unknown Synopsys component type: 0x%08x\n", reg);
+		i2c_dw_release_lock(dev);
+		return -ENODEV;
+	}
+
+	comp_param1 = dw_readl(dev, DW_IC_COMP_PARAM_1);
+
+	/* Disable the adapter. */
+	__i2c_dw_enable_and_wait(dev, false);
+
+	/* Set standard and fast speed deviders for high/low periods. */
+	sda_falling_time = dev->sda_falling_time ?: 300; /* ns */
+	scl_falling_time = dev->scl_falling_time ?: 300; /* ns */
+
+	/* Set SCL timing parameters for standard-mode. */
+	if (dev->ss_hcnt && dev->ss_lcnt) {
+		hcnt = dev->ss_hcnt;
+		lcnt = dev->ss_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+				       4000,	/* tHD;STA = tHIGH = 4.0 us */
+				       sda_falling_time,
+				       0,	/* 0: DW default, 1: Ideal */
+				       0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+				       4700,	/* tLOW = 4.7 us */
+				       scl_falling_time,
+				       0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_SS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_SS_SCL_LCNT);
+	dev_dbg(dev->dev, "Standard-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	/* Set SCL timing parameters for fast-mode or fast-mode plus. */
+	if ((dev->clk_freq == 1000000) && dev->fp_hcnt && dev->fp_lcnt) {
+		hcnt = dev->fp_hcnt;
+		lcnt = dev->fp_lcnt;
+	} else if (dev->fs_hcnt && dev->fs_lcnt) {
+		hcnt = dev->fs_hcnt;
+		lcnt = dev->fs_lcnt;
+	} else {
+		hcnt = i2c_dw_scl_hcnt(i2c_dw_clk_rate(dev),
+				       600,	/* tHD;STA = tHIGH = 0.6 us */
+				       sda_falling_time,
+				       0,	/* 0: DW default, 1: Ideal */
+				       0);	/* No offset */
+		lcnt = i2c_dw_scl_lcnt(i2c_dw_clk_rate(dev),
+				       1300,	/* tLOW = 1.3 us */
+				       scl_falling_time,
+				       0);	/* No offset */
+	}
+	dw_writel(dev, hcnt, DW_IC_FS_SCL_HCNT);
+	dw_writel(dev, lcnt, DW_IC_FS_SCL_LCNT);
+	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
+
+	if ((dev->slave_cfg & DW_IC_CON_SPEED_MASK) ==
+		DW_IC_CON_SPEED_HIGH) {
+		if ((comp_param1 & DW_IC_COMP_PARAM_1_SPEED_MODE_MASK)
+			!= DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH) {
+			dev_err(dev->dev, "High Speed not supported!\n");
+			dev->slave_cfg &= ~DW_IC_CON_SPEED_MASK;
+			dev->slave_cfg |= DW_IC_CON_SPEED_FAST;
+		} else if (dev->hs_hcnt && dev->hs_lcnt) {
+			hcnt = dev->hs_hcnt;
+			lcnt = dev->hs_lcnt;
+			dw_writel(dev, hcnt, DW_IC_HS_SCL_HCNT);
+			dw_writel(dev, lcnt, DW_IC_HS_SCL_LCNT);
+			dev_dbg(dev->dev, "HighSpeed-mode HCNT:LCNT = %d:%d\n",
+				hcnt, lcnt);
+		}
+	}
+
+	/* Configure SDA Hold Time if required. */
+	reg = dw_readl(dev, DW_IC_COMP_VERSION);
+	if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
+		if (!dev->sda_hold_time) {
+			/* Keep previous hold time setting if no one set it. */
+			dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
+		}
+		/*
+		 * Workaround for avoiding TX arbitration lost in case I2C
+		 * slave pulls SDA down "too quickly" after falling egde of
+		 * SCL by enabling non-zero SDA RX hold. Specification says it
+		 * extends incoming SDA low to high transition while SCL is
+		 * high but it apprears to help also above issue.
+		 */
+		if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
+			dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
+		dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
+	} else {
+		dev_warn(dev->dev,
+			 "Hardware too old to adjust SDA hold time.\n");
+	}
+
+	i2c_dw_configure_fifo_slave(dev);
+	i2c_dw_release_lock(dev);
+
+	return 0;
+}
+
+static int i2c_dw_reg_slave(struct i2c_client *slave)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter);
+
+	if (dev->slave)
+		return -EBUSY;
+	if (slave->flags & I2C_CLIENT_TEN)
+		return -EAFNOSUPPORT;
+	/*
+	 * Set slave address in the IC_SAR register,
+	 * the address to which the DW_apb_i2c responds.
+	 */
+	__i2c_dw_enable(dev, false);
+	dw_writel(dev, slave->addr, DW_IC_SAR);
+	dev->slave = slave;
+
+	__i2c_dw_enable(dev, true);
+
+	dev->cmd_err = 0;
+	dev->msg_write_idx = 0;
+	dev->msg_read_idx = 0;
+	dev->msg_err = 0;
+	dev->status = STATUS_IDLE;
+	dev->abort_source = 0;
+	dev->rx_outstanding = 0;
+
+	return 0;
+}
+
+static int i2c_dw_unreg_slave(struct i2c_client *slave)
+{
+	struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter);
+
+	dev->disable_int(dev);
+	dev->disable(dev);
+	dev->slave = NULL;
+
+	return 0;
+}
+
+static u32 i2c_dw_read_clear_intrbits_slave(struct dw_i2c_dev *dev)
+{
+	u32 stat;
+
+	/*
+	 * The IC_INTR_STAT register just indicates "enabled" interrupts.
+	 * Ths unmasked raw version of interrupt status bits are available
+	 * in the IC_RAW_INTR_STAT register.
+	 *
+	 * That is,
+	 *   stat = dw_readl(IC_INTR_STAT);
+	 * equals to,
+	 *   stat = dw_readl(IC_RAW_INTR_STAT) & dw_readl(IC_INTR_MASK);
+	 *
+	 * The raw version might be useful for debugging purposes.
+	 */
+	stat = dw_readl(dev, DW_IC_INTR_STAT);
+
+	/*
+	 * Do not use the IC_CLR_INTR register to clear interrupts, or
+	 * you'll miss some interrupts, triggered during the period from
+	 * dw_readl(IC_INTR_STAT) to dw_readl(IC_CLR_INTR).
+	 *
+	 * Instead, use the separately-prepared IC_CLR_* registers.
+	 */
+	if (stat & DW_IC_INTR_TX_ABRT)
+		dw_readl(dev, DW_IC_CLR_TX_ABRT);
+	if (stat & DW_IC_INTR_RX_UNDER)
+		dw_readl(dev, DW_IC_CLR_RX_UNDER);
+	if (stat & DW_IC_INTR_RX_OVER)
+		dw_readl(dev, DW_IC_CLR_RX_OVER);
+	if (stat & DW_IC_INTR_TX_OVER)
+		dw_readl(dev, DW_IC_CLR_TX_OVER);
+	if (stat & DW_IC_INTR_RX_DONE)
+		dw_readl(dev, DW_IC_CLR_RX_DONE);
+	if (stat & DW_IC_INTR_ACTIVITY)
+		dw_readl(dev, DW_IC_CLR_ACTIVITY);
+	if (stat & DW_IC_INTR_STOP_DET)
+		dw_readl(dev, DW_IC_CLR_STOP_DET);
+	if (stat & DW_IC_INTR_START_DET)
+		dw_readl(dev, DW_IC_CLR_START_DET);
+	if (stat & DW_IC_INTR_GEN_CALL)
+		dw_readl(dev, DW_IC_CLR_GEN_CALL);
+
+	return stat;
+}
+
+/*
+ * Interrupt service routine. This gets called whenever an I2C slave interrupt
+ * occurs.
+ */
+
+static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
+{
+	u32 raw_stat, stat, enabled;
+	u8 val, slave_activity;
+
+	stat = dw_readl(dev, DW_IC_INTR_STAT);
+	enabled = dw_readl(dev, DW_IC_ENABLE);
+	raw_stat = dw_readl(dev, DW_IC_RAW_INTR_STAT);
+	slave_activity = ((dw_readl(dev, DW_IC_STATUS) &
+		DW_IC_STATUS_SLAVE_ACTIVITY) >> 6);
+
+	if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY))
+		return 0;
+
+	dev_dbg(dev->dev,
+		"%#x STATUS SLAVE_ACTIVITY=%#x : RAW_INTR_STAT=%#x : INTR_STAT=%#x\n",
+		enabled, slave_activity, raw_stat, stat);
+
+	if ((stat & DW_IC_INTR_RX_FULL) && (stat & DW_IC_INTR_STOP_DET))
+		i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val);
+
+	if (stat & DW_IC_INTR_RD_REQ) {
+		if (slave_activity) {
+			if (stat & DW_IC_INTR_RX_FULL) {
+				val = dw_readl(dev, DW_IC_DATA_CMD);
+
+				if (!i2c_slave_event(dev->slave,
+						     I2C_SLAVE_WRITE_RECEIVED,
+						     &val)) {
+					dev_vdbg(dev->dev, "Byte %X acked!",
+						 val);
+				}
+				dw_readl(dev, DW_IC_CLR_RD_REQ);
+				stat = i2c_dw_read_clear_intrbits_slave(dev);
+			} else {
+				dw_readl(dev, DW_IC_CLR_RD_REQ);
+				dw_readl(dev, DW_IC_CLR_RX_UNDER);
+				stat = i2c_dw_read_clear_intrbits_slave(dev);
+			}
+			if (!i2c_slave_event(dev->slave,
+					     I2C_SLAVE_READ_REQUESTED,
+					     &val))
+				dw_writel(dev, val, DW_IC_DATA_CMD);
+		}
+	}
+
+	if (stat & DW_IC_INTR_RX_DONE) {
+		if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED,
+				     &val))
+			dw_readl(dev, DW_IC_CLR_RX_DONE);
+
+		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
+		stat = i2c_dw_read_clear_intrbits_slave(dev);
+		return 1;
+	}
+
+	if (stat & DW_IC_INTR_RX_FULL) {
+		val = dw_readl(dev, DW_IC_DATA_CMD);
+		if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED,
+				     &val))
+			dev_vdbg(dev->dev, "Byte %X acked!", val);
+	} else {
+		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
+		stat = i2c_dw_read_clear_intrbits_slave(dev);
+	}
+
+	return 1;
+}
+
+static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
+{
+	struct dw_i2c_dev *dev = dev_id;
+	int ret;
+
+	i2c_dw_read_clear_intrbits_slave(dev);
+	ret = i2c_dw_irq_handler_slave(dev);
+	if (ret > 0)
+		complete(&dev->cmd_complete);
+
+	return IRQ_RETVAL(ret);
+}
+
+static struct i2c_algorithm i2c_dw_algo = {
+	.functionality = i2c_dw_func,
+	.reg_slave = i2c_dw_reg_slave,
+	.unreg_slave = i2c_dw_unreg_slave,
+};
+
+int i2c_dw_probe_slave(struct dw_i2c_dev *dev)
+{
+	struct i2c_adapter *adap = &dev->adapter;
+	int ret;
+
+	init_completion(&dev->cmd_complete);
+
+	dev->init = i2c_dw_init_slave;
+	dev->disable = i2c_dw_disable;
+	dev->disable_int = i2c_dw_disable_int;
+
+	ret = dev->init(dev);
+	if (ret)
+		return ret;
+
+	snprintf(adap->name, sizeof(adap->name),
+		 "Synopsys DesignWare I2C Slave adapter");
+	adap->retries = 3;
+	adap->algo = &i2c_dw_algo;
+	adap->dev.parent = dev->dev;
+	i2c_set_adapdata(adap, dev);
+
+	ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr_slave,
+			       IRQF_SHARED, dev_name(dev->dev), dev);
+	if (ret) {
+		dev_err(dev->dev, "failure requesting irq %i: %d\n",
+			dev->irq, ret);
+		return ret;
+	}
+
+	ret = i2c_add_numbered_adapter(adap);
+	if (ret)
+		dev_err(dev->dev, "failure adding adapter: %d\n", ret);
+	pm_runtime_put_noidle(dev->dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_dw_probe_slave);
+
+MODULE_AUTHOR("Luis Oliveira <lolivei@synopsys.com>");
+MODULE_DESCRIPTION("Synopsys DesignWare I2C bus slave adapter");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c
index 3129127..d2e8448 100644
--- a/drivers/i2c/busses/i2c-emev2.c
+++ b/drivers/i2c/busses/i2c-emev2.c

@@ -375,7 +375,9 @@ static int em_i2c_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->sclk))
 		return PTR_ERR(priv->sclk);
 
-	clk_prepare_enable(priv->sclk);
+	ret = clk_prepare_enable(priv->sclk);
+	if (ret)
+		return ret;
 
 	priv->adap.timeout = msecs_to_jiffies(100);
 	priv->adap.retries = 5;

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 6484fa6..c9536e1 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c

@@ -66,6 +66,8 @@
  * Lewisburg Supersku (PCH)	0xa223	32	hard	yes	yes	yes
  * Kaby Lake PCH-H (PCH)	0xa2a3	32	hard	yes	yes	yes
  * Gemini Lake (SOC)		0x31d4	32	hard	yes	yes	yes
+ * Cannon Lake-H (PCH)		0xa323	32	hard	yes	yes	yes
+ * Cannon Lake-LP (PCH)		0x9da3	32	hard	yes	yes	yes
  *
  * Features supported by this driver:
  * Software PEC				no
@@ -226,10 +228,12 @@
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS		0x9c22
 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_SMBUS	0x9ca2
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS	0x9d23
+#define PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS		0x9da3
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS	0xa123
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS		0xa1a3
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS	0xa223
 #define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS	0xa2a3
+#define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS		0xa323
 
 struct i801_mux_config {
 	char *gpio_chip;
@@ -1026,6 +1030,8 @@ static const struct pci_device_id i801_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) },
 	{ 0, }
 };
 
@@ -1499,6 +1505,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	switch (dev->device) {
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS:
 	case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS:
+	case PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS:
+	case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS:
 	case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS:
 	case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS:
 	case PCI_DEVICE_ID_INTEL_DNV_SMBUS:

diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 5738556..d4e8f19 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c

@@ -419,7 +419,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 
 		ret = mxs_i2c_pio_wait_xfer_end(i2c);
 		if (ret) {
-			dev_err(i2c->dev,
+			dev_dbg(i2c->dev,
 				"PIO: Failed to send SELECT command!\n");
 			goto cleanup;
 		}
@@ -431,7 +431,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 
 		ret = mxs_i2c_pio_wait_xfer_end(i2c);
 		if (ret) {
-			dev_err(i2c->dev,
+			dev_dbg(i2c->dev,
 				"PIO: Failed to send READ command!\n");
 			goto cleanup;
 		}
@@ -528,7 +528,7 @@ static int mxs_i2c_pio_setup_xfer(struct i2c_adapter *adap,
 			/* Wait for the end of the transfer. */
 			ret = mxs_i2c_pio_wait_xfer_end(i2c);
 			if (ret) {
-				dev_err(i2c->dev,
+				dev_dbg(i2c->dev,
 					"PIO: Failed to finish WRITE cmd!\n");
 				break;
 			}

diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 3bd2e7d..853a2ab 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c

@@ -22,14 +22,17 @@
 #include <linux/i2c-algo-pca.h>
 #include <linux/i2c-pca-platform.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/irq.h>
 
 struct i2c_pca_pf_data {
 	void __iomem			*reg_base;
 	int				irq;	/* if 0, use polling */
-	int				gpio;
+	struct gpio_desc		*gpio;
 	wait_queue_head_t		wait;
 	struct i2c_adapter		adap;
 	struct i2c_algo_pca_data	algo_data;
@@ -104,17 +107,17 @@ static int i2c_pca_pf_waitforcompletion(void *pd)
 static void i2c_pca_pf_dummyreset(void *pd)
 {
 	struct i2c_pca_pf_data *i2c = pd;
-	printk(KERN_WARNING "%s: No reset-pin found. Chip may get stuck!\n",
-		i2c->adap.name);
+
+	dev_warn(&i2c->adap.dev, "No reset-pin found. Chip may get stuck!\n");
 }
 
 static void i2c_pca_pf_resetchip(void *pd)
 {
 	struct i2c_pca_pf_data *i2c = pd;
 
-	gpio_set_value(i2c->gpio, 0);
+	gpiod_set_value(i2c->gpio, 1);
 	ndelay(100);
-	gpio_set_value(i2c->gpio, 1);
+	gpiod_set_value(i2c->gpio, 0);
 }
 
 static irqreturn_t i2c_pca_pf_handler(int this_irq, void *dev_id)
@@ -136,36 +139,27 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct i2c_pca9564_pf_platform_data *platform_data =
 				dev_get_platdata(&pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
 	int ret = 0;
 	int irq;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	/* If irq is 0, we do polling. */
+	if (irq < 0)
+		irq = 0;
 
-	if (res == NULL) {
-		ret = -ENODEV;
-		goto e_print;
-	}
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
 
-	if (!request_mem_region(res->start, resource_size(res), res->name)) {
-		ret = -ENOMEM;
-		goto e_print;
-	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i2c->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(i2c->reg_base))
+		return PTR_ERR(i2c->reg_base);
 
-	i2c = kzalloc(sizeof(struct i2c_pca_pf_data), GFP_KERNEL);
-	if (!i2c) {
-		ret = -ENOMEM;
-		goto e_alloc;
-	}
 
 	init_waitqueue_head(&i2c->wait);
 
-	i2c->reg_base = ioremap(res->start, resource_size(res));
-	if (!i2c->reg_base) {
-		ret = -ENOMEM;
-		goto e_remap;
-	}
 	i2c->io_base = res->start;
 	i2c->io_size = resource_size(res);
 	i2c->irq = irq;
@@ -177,20 +171,43 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 		 (unsigned long) res->start);
 	i2c->adap.algo_data = &i2c->algo_data;
 	i2c->adap.dev.parent = &pdev->dev;
+	i2c->adap.dev.of_node = np;
 
 	if (platform_data) {
 		i2c->adap.timeout = platform_data->timeout;
 		i2c->algo_data.i2c_clock = platform_data->i2c_clock_speed;
-		i2c->gpio = platform_data->gpio;
+		if (gpio_is_valid(platform_data->gpio)) {
+			ret = devm_gpio_request_one(&pdev->dev,
+						    platform_data->gpio,
+						    GPIOF_ACTIVE_LOW,
+						    i2c->adap.name);
+			if (ret == 0) {
+				i2c->gpio = gpio_to_desc(platform_data->gpio);
+				gpiod_direction_output(i2c->gpio, 0);
+			} else {
+				dev_warn(&pdev->dev, "Registering gpio failed!\n");
+				i2c->gpio = NULL;
+			}
+		}
+	} else if (np) {
+		i2c->adap.timeout = HZ;
+		i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
+		if (IS_ERR(i2c->gpio))
+			return PTR_ERR(i2c->gpio);
+		of_property_read_u32_index(np, "clock-frequency", 0,
+					   &i2c->algo_data.i2c_clock);
 	} else {
 		i2c->adap.timeout = HZ;
 		i2c->algo_data.i2c_clock = 59000;
-		i2c->gpio = -1;
+		i2c->gpio = NULL;
 	}
 
 	i2c->algo_data.data = i2c;
 	i2c->algo_data.wait_for_completion = i2c_pca_pf_waitforcompletion;
-	i2c->algo_data.reset_chip = i2c_pca_pf_dummyreset;
+	if (i2c->gpio)
+		i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
+	else
+		i2c->algo_data.reset_chip = i2c_pca_pf_dummyreset;
 
 	switch (res->flags & IORESOURCE_MEM_TYPE_MASK) {
 	case IORESOURCE_MEM_32BIT:
@@ -208,52 +225,22 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 		break;
 	}
 
-	/* Use gpio_is_valid() when in mainline */
-	if (i2c->gpio > -1) {
-		ret = gpio_request(i2c->gpio, i2c->adap.name);
-		if (ret == 0) {
-			gpio_direction_output(i2c->gpio, 1);
-			i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
-		} else {
-			printk(KERN_WARNING "%s: Registering gpio failed!\n",
-				i2c->adap.name);
-			i2c->gpio = ret;
-		}
-	}
-
 	if (irq) {
-		ret = request_irq(irq, i2c_pca_pf_handler,
+		ret = devm_request_irq(&pdev->dev, irq, i2c_pca_pf_handler,
 			IRQF_TRIGGER_FALLING, pdev->name, i2c);
 		if (ret)
-			goto e_reqirq;
+			return ret;
 	}
 
-	if (i2c_pca_add_numbered_bus(&i2c->adap) < 0) {
-		ret = -ENODEV;
-		goto e_adapt;
-	}
+	ret = i2c_pca_add_numbered_bus(&i2c->adap);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, i2c);
 
-	printk(KERN_INFO "%s registered.\n", i2c->adap.name);
+	dev_info(&pdev->dev, "registered.\n");
 
 	return 0;
-
-e_adapt:
-	if (irq)
-		free_irq(irq, i2c);
-e_reqirq:
-	if (i2c->gpio > -1)
-		gpio_free(i2c->gpio);
-
-	iounmap(i2c->reg_base);
-e_remap:
-	kfree(i2c);
-e_alloc:
-	release_mem_region(res->start, resource_size(res));
-e_print:
-	printk(KERN_ERR "Registering PCA9564/PCA9665 FAILED! (%d)\n", ret);
-	return ret;
 }
 
 static int i2c_pca_pf_remove(struct platform_device *pdev)
@@ -262,24 +249,24 @@ static int i2c_pca_pf_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&i2c->adap);
 
-	if (i2c->irq)
-		free_irq(i2c->irq, i2c);
-
-	if (i2c->gpio > -1)
-		gpio_free(i2c->gpio);
-
-	iounmap(i2c->reg_base);
-	release_mem_region(i2c->io_base, i2c->io_size);
-	kfree(i2c);
-
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_pca_of_match_table[] = {
+	{ .compatible = "nxp,pca9564" },
+	{ .compatible = "nxp,pca9665" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_pca_of_match_table);
+#endif
+
 static struct platform_driver i2c_pca_pf_driver = {
 	.probe = i2c_pca_pf_probe,
 	.remove = i2c_pca_pf_remove,
 	.driver = {
 		.name = "i2c-pca-platform",
+		.of_match_table = of_match_ptr(i2c_pca_of_match_table),
 	},
 };
 

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 8be3e6c..93c1a54 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c

@@ -1,5 +1,5 @@
 /*
- * Driver for the Renesas RCar I2C unit
+ * Driver for the Renesas R-Car I2C unit
  *
  * Copyright (C) 2014-15 Wolfram Sang <wsa@sang-engineering.com>
  * Copyright (C) 2011-2015 Renesas Electronics Corporation
@@ -783,7 +783,12 @@ static int rcar_unreg_slave(struct i2c_client *slave)
 
 static u32 rcar_i2c_func(struct i2c_adapter *adap)
 {
-	/* This HW can't do SMBUS_QUICK and NOSTART */
+	/*
+	 * This HW can't do:
+	 * I2C_SMBUS_QUICK (setting FSB during START didn't work)
+	 * I2C_M_NOSTART (automatically sends address after START)
+	 * I2C_M_IGNORE_NAK (automatically sends STOP after NAK)
+	 */
 	return I2C_FUNC_I2C | I2C_FUNC_SLAVE |
 		(I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
 }

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 3d75593..2e097d9 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c

@@ -24,7 +24,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
-#include <linux/i2c/i2c-sh_mobile.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -879,10 +878,10 @@ static int sh_mobile_i2c_hook_irqs(struct platform_device *dev, struct sh_mobile
 
 static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
-	struct i2c_sh_mobile_platform_data *pdata = dev_get_platdata(&dev->dev);
 	struct sh_mobile_i2c_data *pd;
 	struct i2c_adapter *adap;
 	struct resource *res;
+	const struct of_device_id *match;
 	int ret;
 	u32 bus_speed;
 
@@ -910,30 +909,18 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	if (IS_ERR(pd->reg))
 		return PTR_ERR(pd->reg);
 
-	/* Use platform data bus speed or STANDARD_MODE */
 	ret = of_property_read_u32(dev->dev.of_node, "clock-frequency", &bus_speed);
 	pd->bus_speed = ret ? STANDARD_MODE : bus_speed;
-
 	pd->clks_per_count = 1;
 
-	if (dev->dev.of_node) {
-		const struct of_device_id *match;
+	match = of_match_device(sh_mobile_i2c_dt_ids, &dev->dev);
+	if (match) {
+		const struct sh_mobile_dt_config *config = match->data;
 
-		match = of_match_device(sh_mobile_i2c_dt_ids, &dev->dev);
-		if (match) {
-			const struct sh_mobile_dt_config *config;
+		pd->clks_per_count = config->clks_per_count;
 
-			config = match->data;
-			pd->clks_per_count = config->clks_per_count;
-
-			if (config->setup)
-				config->setup(pd);
-		}
-	} else {
-		if (pdata && pdata->bus_speed)
-			pd->bus_speed = pdata->bus_speed;
-		if (pdata && pdata->clks_per_count)
-			pd->clks_per_count = pdata->clks_per_count;
+		if (config->setup)
+			config->setup(pd);
 	}
 
 	/* The IIC blocks on SH-Mobile ARM processors

diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c
index 6ba6c83..7e89ba6 100644
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c

@@ -22,10 +22,12 @@
  * using the APM X-Gene SLIMpro mailbox driver.
  *
  */
+#include <acpi/pcc.h>
 #include <linux/acpi.h>
 #include <linux/dma-mapping.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -89,6 +91,8 @@
 	((addrlen << SLIMPRO_IIC_ADDRLEN_SHIFT) & SLIMPRO_IIC_ADDRLEN_MASK) | \
 	((datalen << SLIMPRO_IIC_DATALEN_SHIFT) & SLIMPRO_IIC_DATALEN_MASK))
 
+#define SLIMPRO_MSG_TYPE(v)             (((v) & 0xF0000000) >> 28)
+
 /*
  * Encode for upper address for block data
  */
@@ -99,19 +103,47 @@
 								& 0x3FF00000))
 #define SLIMPRO_IIC_ENCODE_ADDR(a)			((a) & 0x000FFFFF)
 
+#define SLIMPRO_IIC_MSG_DWORD_COUNT			3
+
+/* PCC related defines */
+#define PCC_SIGNATURE			0x50424300
+#define PCC_STS_CMD_COMPLETE		BIT(0)
+#define PCC_STS_SCI_DOORBELL		BIT(1)
+#define PCC_STS_ERR			BIT(2)
+#define PCC_STS_PLAT_NOTIFY		BIT(3)
+#define PCC_CMD_GENERATE_DB_INT		BIT(15)
+
 struct slimpro_i2c_dev {
 	struct i2c_adapter adapter;
 	struct device *dev;
 	struct mbox_chan *mbox_chan;
 	struct mbox_client mbox_client;
+	int mbox_idx;
 	struct completion rd_complete;
 	u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1]; /* dma_buffer[0] is used for length */
 	u32 *resp_msg;
+	phys_addr_t comm_base_addr;
+	void *pcc_comm_addr;
 };
 
 #define to_slimpro_i2c_dev(cl)	\
 		container_of(cl, struct slimpro_i2c_dev, mbox_client)
 
+/*
+ * This function tests and clears a bitmask then returns its old value
+ */
+static u16 xgene_word_tst_and_clr(u16 *addr, u16 mask)
+{
+	u16 ret, val;
+
+	val = le16_to_cpu(READ_ONCE(*addr));
+	ret = val & mask;
+	val &= ~mask;
+	WRITE_ONCE(*addr, cpu_to_le16(val));
+
+	return ret;
+}
+
 static void slimpro_i2c_rx_cb(struct mbox_client *cl, void *mssg)
 {
 	struct slimpro_i2c_dev *ctx = to_slimpro_i2c_dev(cl);
@@ -129,9 +161,53 @@ static void slimpro_i2c_rx_cb(struct mbox_client *cl, void *mssg)
 		complete(&ctx->rd_complete);
 }
 
+static void slimpro_i2c_pcc_rx_cb(struct mbox_client *cl, void *msg)
+{
+	struct slimpro_i2c_dev *ctx = to_slimpro_i2c_dev(cl);
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+
+	/* Check if platform sends interrupt */
+	if (!xgene_word_tst_and_clr(&generic_comm_base->status,
+				    PCC_STS_SCI_DOORBELL))
+		return;
+
+	if (xgene_word_tst_and_clr(&generic_comm_base->status,
+				   PCC_STS_CMD_COMPLETE)) {
+		msg = generic_comm_base + 1;
+
+		/* Response message msg[1] contains the return value. */
+		if (ctx->resp_msg)
+			*ctx->resp_msg = ((u32 *)msg)[1];
+
+		complete(&ctx->rd_complete);
+	}
+}
+
+static void slimpro_i2c_pcc_tx_prepare(struct slimpro_i2c_dev *ctx, u32 *msg)
+{
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+	u32 *ptr = (void *)(generic_comm_base + 1);
+	u16 status;
+	int i;
+
+	WRITE_ONCE(generic_comm_base->signature,
+		   cpu_to_le32(PCC_SIGNATURE | ctx->mbox_idx));
+
+	WRITE_ONCE(generic_comm_base->command,
+		   cpu_to_le16(SLIMPRO_MSG_TYPE(msg[0]) | PCC_CMD_GENERATE_DB_INT));
+
+	status = le16_to_cpu(READ_ONCE(generic_comm_base->status));
+	status &= ~PCC_STS_CMD_COMPLETE;
+	WRITE_ONCE(generic_comm_base->status, cpu_to_le16(status));
+
+	/* Copy the message to the PCC comm space */
+	for (i = 0; i < SLIMPRO_IIC_MSG_DWORD_COUNT; i++)
+		WRITE_ONCE(ptr[i], cpu_to_le32(msg[i]));
+}
+
 static int start_i2c_msg_xfer(struct slimpro_i2c_dev *ctx)
 {
-	if (ctx->mbox_client.tx_block) {
+	if (ctx->mbox_client.tx_block || !acpi_disabled) {
 		if (!wait_for_completion_timeout(&ctx->rd_complete,
 						 msecs_to_jiffies(MAILBOX_OP_TIMEOUT)))
 			return -ETIMEDOUT;
@@ -144,26 +220,46 @@ static int start_i2c_msg_xfer(struct slimpro_i2c_dev *ctx)
 	return 0;
 }
 
+static int slimpro_i2c_send_msg(struct slimpro_i2c_dev *ctx,
+				u32 *msg,
+				u32 *data)
+{
+	int rc;
+
+	ctx->resp_msg = data;
+
+	if (!acpi_disabled) {
+		reinit_completion(&ctx->rd_complete);
+		slimpro_i2c_pcc_tx_prepare(ctx, msg);
+	}
+
+	rc = mbox_send_message(ctx->mbox_chan, msg);
+	if (rc < 0)
+		goto err;
+
+	rc = start_i2c_msg_xfer(ctx);
+
+err:
+	if (!acpi_disabled)
+		mbox_chan_txdone(ctx->mbox_chan, 0);
+
+	ctx->resp_msg = NULL;
+
+	return rc;
+}
+
 static int slimpro_i2c_rd(struct slimpro_i2c_dev *ctx, u32 chip,
 			  u32 addr, u32 addrlen, u32 protocol,
 			  u32 readlen, u32 *data)
 {
 	u32 msg[3];
-	int rc;
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip,
 					SLIMPRO_IIC_READ, protocol, addrlen, readlen);
 	msg[1] = SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = 0;
-	ctx->resp_msg = data;
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err;
 
-	rc = start_i2c_msg_xfer(ctx);
-err:
-	ctx->resp_msg = NULL;
-	return rc;
+	return slimpro_i2c_send_msg(ctx, msg, data);
 }
 
 static int slimpro_i2c_wr(struct slimpro_i2c_dev *ctx, u32 chip,
@@ -171,22 +267,13 @@ static int slimpro_i2c_wr(struct slimpro_i2c_dev *ctx, u32 chip,
 			  u32 data)
 {
 	u32 msg[3];
-	int rc;
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip,
 					SLIMPRO_IIC_WRITE, protocol, addrlen, writelen);
 	msg[1] = SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = data;
-	ctx->resp_msg = msg;
 
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err;
-
-	rc = start_i2c_msg_xfer(ctx);
-err:
-	ctx->resp_msg = NULL;
-	return rc;
+	return slimpro_i2c_send_msg(ctx, msg, msg);
 }
 
 static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
@@ -201,8 +288,7 @@ static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
 	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
-		rc = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip, SLIMPRO_IIC_READ,
@@ -212,21 +298,13 @@ static int slimpro_i2c_blkrd(struct slimpro_i2c_dev *ctx, u32 chip, u32 addr,
 		 SLIMPRO_IIC_ENCODE_UPPER_BUFADDR(paddr) |
 		 SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = (u32)paddr;
-	ctx->resp_msg = msg;
 
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err_unmap;
-
-	rc = start_i2c_msg_xfer(ctx);
+	rc = slimpro_i2c_send_msg(ctx, msg, msg);
 
 	/* Copy to destination */
 	memcpy(data, ctx->dma_buffer, readlen);
 
-err_unmap:
 	dma_unmap_single(ctx->dev, paddr, readlen, DMA_FROM_DEVICE);
-err:
-	ctx->resp_msg = NULL;
 	return rc;
 }
 
@@ -244,8 +322,7 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
 	if (dma_mapping_error(ctx->dev, paddr)) {
 		dev_err(&ctx->adapter.dev, "Error in mapping dma buffer %p\n",
 			ctx->dma_buffer);
-		rc = -ENOMEM;
-		goto err;
+		return -ENOMEM;
 	}
 
 	msg[0] = SLIMPRO_IIC_ENCODE_MSG(SLIMPRO_IIC_BUS, chip, SLIMPRO_IIC_WRITE,
@@ -254,21 +331,13 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip,
 		 SLIMPRO_IIC_ENCODE_UPPER_BUFADDR(paddr) |
 		 SLIMPRO_IIC_ENCODE_ADDR(addr);
 	msg[2] = (u32)paddr;
-	ctx->resp_msg = msg;
 
 	if (ctx->mbox_client.tx_block)
 		reinit_completion(&ctx->rd_complete);
 
-	rc = mbox_send_message(ctx->mbox_chan, &msg);
-	if (rc < 0)
-		goto err_unmap;
+	rc = slimpro_i2c_send_msg(ctx, msg, msg);
 
-	rc = start_i2c_msg_xfer(ctx);
-
-err_unmap:
 	dma_unmap_single(ctx->dev, paddr, writelen, DMA_TO_DEVICE);
-err:
-	ctx->resp_msg = NULL;
 	return rc;
 }
 
@@ -394,17 +463,73 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 
 	/* Request mailbox channel */
 	cl->dev = &pdev->dev;
-	cl->rx_callback = slimpro_i2c_rx_cb;
-	cl->tx_block = true;
 	init_completion(&ctx->rd_complete);
 	cl->tx_tout = MAILBOX_OP_TIMEOUT;
 	cl->knows_txdone = false;
-	ctx->mbox_chan = mbox_request_channel(cl, MAILBOX_I2C_INDEX);
-	if (IS_ERR(ctx->mbox_chan)) {
-		dev_err(&pdev->dev, "i2c mailbox channel request failed\n");
-		return PTR_ERR(ctx->mbox_chan);
-	}
+	if (acpi_disabled) {
+		cl->tx_block = true;
+		cl->rx_callback = slimpro_i2c_rx_cb;
+		ctx->mbox_chan = mbox_request_channel(cl, MAILBOX_I2C_INDEX);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev, "i2c mailbox channel request failed\n");
+			return PTR_ERR(ctx->mbox_chan);
+		}
+	} else {
+		struct acpi_pcct_hw_reduced *cppc_ss;
 
+		if (device_property_read_u32(&pdev->dev, "pcc-channel",
+					     &ctx->mbox_idx))
+			ctx->mbox_idx = MAILBOX_I2C_INDEX;
+
+		cl->tx_block = false;
+		cl->rx_callback = slimpro_i2c_pcc_rx_cb;
+		ctx->mbox_chan = pcc_mbox_request_channel(cl, ctx->mbox_idx);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev, "PCC mailbox channel request failed\n");
+			return PTR_ERR(ctx->mbox_chan);
+		}
+
+		/*
+		 * The PCC mailbox controller driver should
+		 * have parsed the PCCT (global table of all
+		 * PCC channels) and stored pointers to the
+		 * subspace communication region in con_priv.
+		 */
+		cppc_ss = ctx->mbox_chan->con_priv;
+		if (!cppc_ss) {
+			dev_err(&pdev->dev, "PPC subspace not found\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
+
+		if (!ctx->mbox_chan->mbox->txdone_irq) {
+			dev_err(&pdev->dev, "PCC IRQ not supported\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
+
+		/*
+		 * This is the shared communication region
+		 * for the OS and Platform to communicate over.
+		 */
+		ctx->comm_base_addr = cppc_ss->base_address;
+		if (ctx->comm_base_addr) {
+			ctx->pcc_comm_addr = memremap(ctx->comm_base_addr,
+						      cppc_ss->length,
+						      MEMREMAP_WB);
+		} else {
+			dev_err(&pdev->dev, "Failed to get PCC comm region\n");
+			rc = -ENOENT;
+			goto mbox_err;
+		}
+
+		if (!ctx->pcc_comm_addr) {
+			dev_err(&pdev->dev,
+				"Failed to ioremap PCC comm region\n");
+			rc = -ENOMEM;
+			goto mbox_err;
+		}
+	}
 	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 	if (rc)
 		dev_warn(&pdev->dev, "Unable to set dma mask\n");
@@ -419,13 +544,19 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev)
 	ACPI_COMPANION_SET(&adapter->dev, ACPI_COMPANION(&pdev->dev));
 	i2c_set_adapdata(adapter, ctx);
 	rc = i2c_add_adapter(adapter);
-	if (rc) {
-		mbox_free_channel(ctx->mbox_chan);
-		return rc;
-	}
+	if (rc)
+		goto mbox_err;
 
 	dev_info(&pdev->dev, "Mailbox I2C Adapter registered\n");
 	return 0;
+
+mbox_err:
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
+
+	return rc;
 }
 
 static int xgene_slimpro_i2c_remove(struct platform_device *pdev)
@@ -434,7 +565,10 @@ static int xgene_slimpro_i2c_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&ctx->adapter);
 
-	mbox_free_channel(ctx->mbox_chan);
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
 
 	return 0;
 }

diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index ae80228..6b106e9 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c

@@ -393,6 +393,7 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
 	init_completion(&priv->msg_complete);
 	priv->adapter.dev.parent = &pdev->dev;
 	priv->adapter.algo = &xlp9xx_i2c_algo;
+	priv->adapter.class = I2C_CLASS_HWMON;
 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&pdev->dev));
 	priv->adapter.dev.of_node = pdev->dev.of_node;
 	priv->dev = &pdev->dev;

diff --git a/drivers/i2c/busses/i2c-zx2967.c b/drivers/i2c/busses/i2c-zx2967.c
new file mode 100644
index 0000000..48281c1
--- /dev/null
+++ b/drivers/i2c/busses/i2c-zx2967.c

@@ -0,0 +1,609 @@
+/*
+ * Copyright (C) 2017 Sanechips Technology Co., Ltd.
+ * Copyright 2017 Linaro Ltd.
+ *
+ * Author: Baoyou Xie <baoyou.xie@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define REG_CMD				0x04
+#define REG_DEVADDR_H			0x0C
+#define REG_DEVADDR_L			0x10
+#define REG_CLK_DIV_FS			0x14
+#define REG_CLK_DIV_HS			0x18
+#define REG_WRCONF			0x1C
+#define REG_RDCONF			0x20
+#define REG_DATA			0x24
+#define REG_STAT			0x28
+
+#define I2C_STOP			0
+#define I2C_MASTER			BIT(0)
+#define I2C_ADDR_MODE_TEN		BIT(1)
+#define I2C_IRQ_MSK_ENABLE		BIT(3)
+#define I2C_RW_READ			BIT(4)
+#define I2C_CMB_RW_EN			BIT(5)
+#define I2C_START			BIT(6)
+
+#define I2C_ADDR_LOW_MASK		GENMASK(6, 0)
+#define I2C_ADDR_LOW_SHIFT		0
+#define I2C_ADDR_HI_MASK		GENMASK(2, 0)
+#define I2C_ADDR_HI_SHIFT		7
+
+#define I2C_WFIFO_RESET			BIT(7)
+#define I2C_RFIFO_RESET			BIT(7)
+
+#define I2C_IRQ_ACK_CLEAR		BIT(7)
+#define I2C_INT_MASK			GENMASK(6, 0)
+
+#define I2C_TRANS_DONE			BIT(0)
+#define I2C_SR_EDEVICE			BIT(1)
+#define I2C_SR_EDATA			BIT(2)
+
+#define I2C_FIFO_MAX			16
+
+#define I2C_TIMEOUT			msecs_to_jiffies(1000)
+
+#define DEV(i2c)			((i2c)->adap.dev.parent)
+
+struct zx2967_i2c {
+	struct i2c_adapter	adap;
+	struct clk		*clk;
+	struct completion	complete;
+	u32			clk_freq;
+	void __iomem		*reg_base;
+	size_t			residue;
+	int			irq;
+	int			msg_rd;
+	u8			*cur_trans;
+	u8			access_cnt;
+	bool			is_suspended;
+	int			error;
+};
+
+static void zx2967_i2c_writel(struct zx2967_i2c *i2c,
+			      u32 val, unsigned long reg)
+{
+	writel_relaxed(val, i2c->reg_base + reg);
+}
+
+static u32 zx2967_i2c_readl(struct zx2967_i2c *i2c, unsigned long reg)
+{
+	return readl_relaxed(i2c->reg_base + reg);
+}
+
+static void zx2967_i2c_writesb(struct zx2967_i2c *i2c,
+			       void *data, unsigned long reg, int len)
+{
+	writesb(i2c->reg_base + reg, data, len);
+}
+
+static void zx2967_i2c_readsb(struct zx2967_i2c *i2c,
+			      void *data, unsigned long reg, int len)
+{
+	readsb(i2c->reg_base + reg, data, len);
+}
+
+static void zx2967_i2c_start_ctrl(struct zx2967_i2c *i2c)
+{
+	u32 status;
+	u32 ctl;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT);
+	status |= I2C_IRQ_ACK_CLEAR;
+	zx2967_i2c_writel(i2c, status, REG_STAT);
+
+	ctl = zx2967_i2c_readl(i2c, REG_CMD);
+	if (i2c->msg_rd)
+		ctl |= I2C_RW_READ;
+	else
+		ctl &= ~I2C_RW_READ;
+	ctl &= ~I2C_CMB_RW_EN;
+	ctl |= I2C_START;
+	zx2967_i2c_writel(i2c, ctl, REG_CMD);
+}
+
+static void zx2967_i2c_flush_fifos(struct zx2967_i2c *i2c)
+{
+	u32 offset;
+	u32 val;
+
+	if (i2c->msg_rd) {
+		offset = REG_RDCONF;
+		val = I2C_RFIFO_RESET;
+	} else {
+		offset = REG_WRCONF;
+		val = I2C_WFIFO_RESET;
+	}
+
+	val |= zx2967_i2c_readl(i2c, offset);
+	zx2967_i2c_writel(i2c, val, offset);
+}
+
+static int zx2967_i2c_empty_rx_fifo(struct zx2967_i2c *i2c, u32 size)
+{
+	u8 val[I2C_FIFO_MAX] = {0};
+	int i;
+
+	if (size > I2C_FIFO_MAX) {
+		dev_err(DEV(i2c), "fifo size %d over the max value %d\n",
+			size, I2C_FIFO_MAX);
+		return -EINVAL;
+	}
+
+	zx2967_i2c_readsb(i2c, val, REG_DATA, size);
+	for (i = 0; i < size; i++) {
+		*i2c->cur_trans++ = val[i];
+		i2c->residue--;
+	}
+
+	barrier();
+
+	return 0;
+}
+
+static int zx2967_i2c_fill_tx_fifo(struct zx2967_i2c *i2c)
+{
+	size_t residue = i2c->residue;
+	u8 *buf = i2c->cur_trans;
+
+	if (residue == 0) {
+		dev_err(DEV(i2c), "residue is %d\n", (int)residue);
+		return -EINVAL;
+	}
+
+	if (residue <= I2C_FIFO_MAX) {
+		zx2967_i2c_writesb(i2c, buf, REG_DATA, residue);
+
+		/* Again update before writing to FIFO to make sure isr sees. */
+		i2c->residue = 0;
+		i2c->cur_trans = NULL;
+	} else {
+		zx2967_i2c_writesb(i2c, buf, REG_DATA, I2C_FIFO_MAX);
+		i2c->residue -= I2C_FIFO_MAX;
+		i2c->cur_trans += I2C_FIFO_MAX;
+	}
+
+	barrier();
+
+	return 0;
+}
+
+static int zx2967_i2c_reset_hardware(struct zx2967_i2c *i2c)
+{
+	u32 val;
+	u32 clk_div;
+
+	val = I2C_MASTER | I2C_IRQ_MSK_ENABLE;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	clk_div = clk_get_rate(i2c->clk) / i2c->clk_freq - 1;
+	zx2967_i2c_writel(i2c, clk_div, REG_CLK_DIV_FS);
+	zx2967_i2c_writel(i2c, clk_div, REG_CLK_DIV_HS);
+
+	zx2967_i2c_writel(i2c, I2C_FIFO_MAX - 1, REG_WRCONF);
+	zx2967_i2c_writel(i2c, I2C_FIFO_MAX - 1, REG_RDCONF);
+	zx2967_i2c_writel(i2c, 1, REG_RDCONF);
+
+	zx2967_i2c_flush_fifos(i2c);
+
+	return 0;
+}
+
+static void zx2967_i2c_isr_clr(struct zx2967_i2c *i2c)
+{
+	u32 status;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT);
+	status |= I2C_IRQ_ACK_CLEAR;
+	zx2967_i2c_writel(i2c, status, REG_STAT);
+}
+
+static irqreturn_t zx2967_i2c_isr(int irq, void *dev_id)
+{
+	u32 status;
+	struct zx2967_i2c *i2c = (struct zx2967_i2c *)dev_id;
+
+	status = zx2967_i2c_readl(i2c, REG_STAT) & I2C_INT_MASK;
+	zx2967_i2c_isr_clr(i2c);
+
+	if (status & I2C_SR_EDEVICE)
+		i2c->error = -ENXIO;
+	else if (status & I2C_SR_EDATA)
+		i2c->error = -EIO;
+	else if (status & I2C_TRANS_DONE)
+		i2c->error = 0;
+	else
+		goto done;
+
+	complete(&i2c->complete);
+done:
+	return IRQ_HANDLED;
+}
+
+static void zx2967_set_addr(struct zx2967_i2c *i2c, u16 addr)
+{
+	u16 val;
+
+	val = (addr >> I2C_ADDR_LOW_SHIFT) & I2C_ADDR_LOW_MASK;
+	zx2967_i2c_writel(i2c, val, REG_DEVADDR_L);
+
+	val = (addr >> I2C_ADDR_HI_SHIFT) & I2C_ADDR_HI_MASK;
+	zx2967_i2c_writel(i2c, val, REG_DEVADDR_H);
+	if (val)
+		val = zx2967_i2c_readl(i2c, REG_CMD) | I2C_ADDR_MODE_TEN;
+	else
+		val = zx2967_i2c_readl(i2c, REG_CMD) & ~I2C_ADDR_MODE_TEN;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+}
+
+static int zx2967_i2c_xfer_bytes(struct zx2967_i2c *i2c, u32 bytes)
+{
+	unsigned long time_left;
+	int rd = i2c->msg_rd;
+	int ret;
+
+	reinit_completion(&i2c->complete);
+
+	if (rd) {
+		zx2967_i2c_writel(i2c, bytes - 1, REG_RDCONF);
+	} else {
+		ret = zx2967_i2c_fill_tx_fifo(i2c);
+		if (ret)
+			return ret;
+	}
+
+	zx2967_i2c_start_ctrl(i2c);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	return rd ? zx2967_i2c_empty_rx_fifo(i2c, bytes) : 0;
+}
+
+static int zx2967_i2c_xfer_msg(struct zx2967_i2c *i2c,
+			       struct i2c_msg *msg)
+{
+	int ret;
+	int i;
+
+	if (msg->len == 0)
+		return -EINVAL;
+
+	zx2967_i2c_flush_fifos(i2c);
+
+	i2c->cur_trans = msg->buf;
+	i2c->residue = msg->len;
+	i2c->access_cnt = msg->len / I2C_FIFO_MAX;
+	i2c->msg_rd = msg->flags & I2C_M_RD;
+
+	for (i = 0; i < i2c->access_cnt; i++) {
+		ret = zx2967_i2c_xfer_bytes(i2c, I2C_FIFO_MAX);
+		if (ret)
+			return ret;
+	}
+
+	if (i2c->residue > 0) {
+		ret = zx2967_i2c_xfer_bytes(i2c, i2c->residue);
+		if (ret)
+			return ret;
+	}
+
+	i2c->residue = 0;
+	i2c->access_cnt = 0;
+
+	return 0;
+}
+
+static int zx2967_i2c_xfer(struct i2c_adapter *adap,
+			   struct i2c_msg *msgs, int num)
+{
+	struct zx2967_i2c *i2c = i2c_get_adapdata(adap);
+	int ret;
+	int i;
+
+	if (i2c->is_suspended)
+		return -EBUSY;
+
+	zx2967_set_addr(i2c, msgs->addr);
+
+	for (i = 0; i < num; i++) {
+		ret = zx2967_i2c_xfer_msg(i2c, &msgs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return num;
+}
+
+static void
+zx2967_smbus_xfer_prepare(struct zx2967_i2c *i2c, u16 addr,
+			  char read_write, u8 command, int size,
+			  union i2c_smbus_data *data)
+{
+	u32 val;
+
+	val = zx2967_i2c_readl(i2c, REG_RDCONF);
+	val |= I2C_RFIFO_RESET;
+	zx2967_i2c_writel(i2c, val, REG_RDCONF);
+	zx2967_set_addr(i2c, addr);
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val &= ~I2C_RW_READ;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		if (read_write == I2C_SMBUS_WRITE)
+			zx2967_i2c_writel(i2c, data->byte, REG_DATA);
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		zx2967_i2c_writel(i2c, command, REG_DATA);
+		if (read_write == I2C_SMBUS_WRITE) {
+			zx2967_i2c_writel(i2c, (data->word >> 8), REG_DATA);
+			zx2967_i2c_writel(i2c, (data->word & 0xff),
+					  REG_DATA);
+		}
+		break;
+	}
+}
+
+static int zx2967_smbus_xfer_read(struct zx2967_i2c *i2c, int size,
+				  union i2c_smbus_data *data)
+{
+	unsigned long time_left;
+	u8 buf[2];
+	u32 val;
+
+	reinit_completion(&i2c->complete);
+
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_CMB_RW_EN;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_START;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+		val = zx2967_i2c_readl(i2c, REG_DATA);
+		data->byte = val;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+	case I2C_SMBUS_PROC_CALL:
+		buf[0] = zx2967_i2c_readl(i2c, REG_DATA);
+		buf[1] = zx2967_i2c_readl(i2c, REG_DATA);
+		data->word = (buf[0] << 8) | buf[1];
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int zx2967_smbus_xfer_write(struct zx2967_i2c *i2c)
+{
+	unsigned long time_left;
+	u32 val;
+
+	reinit_completion(&i2c->complete);
+	val = zx2967_i2c_readl(i2c, REG_CMD);
+	val |= I2C_START;
+	zx2967_i2c_writel(i2c, val, REG_CMD);
+
+	time_left = wait_for_completion_timeout(&i2c->complete,
+						I2C_TIMEOUT);
+	if (time_left == 0)
+		return -ETIMEDOUT;
+
+	if (i2c->error)
+		return i2c->error;
+
+	return 0;
+}
+
+static int zx2967_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			     unsigned short flags, char read_write,
+			     u8 command, int size, union i2c_smbus_data *data)
+{
+	struct zx2967_i2c *i2c = i2c_get_adapdata(adap);
+
+	if (size == I2C_SMBUS_QUICK)
+		read_write = I2C_SMBUS_WRITE;
+
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+	case I2C_SMBUS_WORD_DATA:
+		zx2967_smbus_xfer_prepare(i2c, addr, read_write,
+					  command, size, data);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (read_write == I2C_SMBUS_READ)
+		return zx2967_smbus_xfer_read(i2c, size, data);
+
+	return zx2967_smbus_xfer_write(i2c);
+}
+
+static u32 zx2967_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C |
+	       I2C_FUNC_SMBUS_QUICK |
+	       I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA |
+	       I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_DATA |
+	       I2C_FUNC_SMBUS_PROC_CALL |
+	       I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static int __maybe_unused zx2967_i2c_suspend(struct device *dev)
+{
+	struct zx2967_i2c *i2c = dev_get_drvdata(dev);
+
+	i2c->is_suspended = true;
+	clk_disable_unprepare(i2c->clk);
+
+	return 0;
+}
+
+static int __maybe_unused zx2967_i2c_resume(struct device *dev)
+{
+	struct zx2967_i2c *i2c = dev_get_drvdata(dev);
+
+	i2c->is_suspended = false;
+	clk_prepare_enable(i2c->clk);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(zx2967_i2c_dev_pm_ops,
+			 zx2967_i2c_suspend, zx2967_i2c_resume);
+
+static const struct i2c_algorithm zx2967_i2c_algo = {
+	.master_xfer = zx2967_i2c_xfer,
+	.smbus_xfer = zx2967_smbus_xfer,
+	.functionality = zx2967_i2c_func,
+};
+
+static const struct of_device_id zx2967_i2c_of_match[] = {
+	{ .compatible = "zte,zx296718-i2c", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, zx2967_i2c_of_match);
+
+static int zx2967_i2c_probe(struct platform_device *pdev)
+{
+	struct zx2967_i2c *i2c;
+	void __iomem *reg_base;
+	struct resource *res;
+	struct clk *clk;
+	int ret;
+
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(reg_base))
+		return PTR_ERR(reg_base);
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "missing controller clock");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable i2c_clk\n");
+		return ret;
+	}
+
+	ret = device_property_read_u32(&pdev->dev, "clock-frequency",
+				       &i2c->clk_freq);
+	if (ret) {
+		dev_err(&pdev->dev, "missing clock-frequency");
+		return ret;
+	}
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		return ret;
+
+	i2c->irq = ret;
+	i2c->reg_base = reg_base;
+	i2c->clk = clk;
+
+	init_completion(&i2c->complete);
+	platform_set_drvdata(pdev, i2c);
+
+	ret = zx2967_i2c_reset_hardware(i2c);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to initialize i2c controller\n");
+		goto err_clk_unprepare;
+	}
+
+	ret = devm_request_irq(&pdev->dev, i2c->irq,
+			zx2967_i2c_isr, 0, dev_name(&pdev->dev), i2c);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request irq %i\n", i2c->irq);
+		goto err_clk_unprepare;
+	}
+
+	i2c_set_adapdata(&i2c->adap, i2c);
+	strlcpy(i2c->adap.name, "zx2967 i2c adapter",
+		sizeof(i2c->adap.name));
+	i2c->adap.algo = &zx2967_i2c_algo;
+	i2c->adap.nr = pdev->id;
+	i2c->adap.dev.parent = &pdev->dev;
+	i2c->adap.dev.of_node = pdev->dev.of_node;
+
+	ret = i2c_add_numbered_adapter(&i2c->adap);
+	if (ret)
+		goto err_clk_unprepare;
+
+	return 0;
+
+err_clk_unprepare:
+	clk_disable_unprepare(i2c->clk);
+	return ret;
+}
+
+static int zx2967_i2c_remove(struct platform_device *pdev)
+{
+	struct zx2967_i2c *i2c = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(&i2c->adap);
+	clk_disable_unprepare(i2c->clk);
+
+	return 0;
+}
+
+static struct platform_driver zx2967_i2c_driver = {
+	.probe	= zx2967_i2c_probe,
+	.remove	= zx2967_i2c_remove,
+	.driver	= {
+		.name  = "zx2967_i2c",
+		.of_match_table = zx2967_i2c_of_match,
+		.pm = &zx2967_i2c_dev_pm_ops,
+	},
+};
+module_platform_driver(zx2967_i2c_driver);
+
+MODULE_AUTHOR("Baoyou Xie <baoyou.xie@linaro.org>");
+MODULE_DESCRIPTION("ZTE ZX2967 I2C Bus Controller driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
new file mode 100644
index 0000000..4842ec3
--- /dev/null
+++ b/drivers/i2c/i2c-core-acpi.c

@@ -0,0 +1,665 @@
+/*
+ * Linux I2C core ACPI support code
+ *
+ * Copyright (C) 2014 Intel Corp, Author: Lan Tianyu <tianyu.lan@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "i2c-core.h"
+
+struct i2c_acpi_handler_data {
+	struct acpi_connection_info info;
+	struct i2c_adapter *adapter;
+};
+
+struct gsb_buffer {
+	u8	status;
+	u8	len;
+	union {
+		u16	wdata;
+		u8	bdata;
+		u8	data[0];
+	};
+} __packed;
+
+struct i2c_acpi_lookup {
+	struct i2c_board_info *info;
+	acpi_handle adapter_handle;
+	acpi_handle device_handle;
+	acpi_handle search_handle;
+	int n;
+	int index;
+	u32 speed;
+	u32 min_speed;
+};
+
+static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
+{
+	struct i2c_acpi_lookup *lookup = data;
+	struct i2c_board_info *info = lookup->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	acpi_status status;
+
+	if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+		return 1;
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
+		return 1;
+
+	if (lookup->index != -1 && lookup->n++ != lookup->index)
+		return 1;
+
+	status = acpi_get_handle(lookup->device_handle,
+				 sb->resource_source.string_ptr,
+				 &lookup->adapter_handle);
+	if (!ACPI_SUCCESS(status))
+		return 1;
+
+	info->addr = sb->slave_address;
+	lookup->speed = sb->connection_speed;
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		info->flags |= I2C_CLIENT_TEN;
+
+	return 1;
+}
+
+static const struct acpi_device_id i2c_acpi_ignored_device_ids[] = {
+	/*
+	 * ACPI video acpi_devices, which are handled by the acpi-video driver
+	 * sometimes contain a SERIAL_TYPE_I2C ACPI resource, ignore these.
+	 */
+	{ ACPI_VIDEO_HID, 0 },
+	{}
+};
+
+static int i2c_acpi_do_lookup(struct acpi_device *adev,
+			      struct i2c_acpi_lookup *lookup)
+{
+	struct i2c_board_info *info = lookup->info;
+	struct list_head resource_list;
+	int ret;
+
+	if (acpi_bus_get_status(adev) || !adev->status.present ||
+	    acpi_device_enumerated(adev))
+		return -EINVAL;
+
+	if (acpi_match_device_ids(adev, i2c_acpi_ignored_device_ids) == 0)
+		return -ENODEV;
+
+	memset(info, 0, sizeof(*info));
+	lookup->device_handle = acpi_device_handle(adev);
+
+	/* Look up for I2cSerialBus resource */
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     i2c_acpi_fill_info, lookup);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info->addr)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int i2c_acpi_get_info(struct acpi_device *adev,
+			     struct i2c_board_info *info,
+			     struct i2c_adapter *adapter,
+			     acpi_handle *adapter_handle)
+{
+	struct list_head resource_list;
+	struct resource_entry *entry;
+	struct i2c_acpi_lookup lookup;
+	int ret;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.info = info;
+	lookup.index = -1;
+
+	ret = i2c_acpi_do_lookup(adev, &lookup);
+	if (ret)
+		return ret;
+
+	if (adapter) {
+		/* The adapter must match the one in I2cSerialBus() connector */
+		if (ACPI_HANDLE(&adapter->dev) != lookup.adapter_handle)
+			return -ENODEV;
+	} else {
+		struct acpi_device *adapter_adev;
+
+		/* The adapter must be present */
+		if (acpi_bus_get_device(lookup.adapter_handle, &adapter_adev))
+			return -ENODEV;
+		if (acpi_bus_get_status(adapter_adev) ||
+		    !adapter_adev->status.present)
+			return -ENODEV;
+	}
+
+	info->fwnode = acpi_fwnode_handle(adev);
+	if (adapter_handle)
+		*adapter_handle = lookup.adapter_handle;
+
+	/* Then fill IRQ number if any */
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (ret < 0)
+		return -EINVAL;
+
+	resource_list_for_each_entry(entry, &resource_list) {
+		if (resource_type(entry->res) == IORESOURCE_IRQ) {
+			info->irq = entry->res->start;
+			break;
+		}
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	acpi_set_modalias(adev, dev_name(&adev->dev), info->type,
+			  sizeof(info->type));
+
+	return 0;
+}
+
+static void i2c_acpi_register_device(struct i2c_adapter *adapter,
+				     struct acpi_device *adev,
+				     struct i2c_board_info *info)
+{
+	adev->power.flags.ignore_parent = true;
+	acpi_device_set_enumerated(adev);
+
+	if (!i2c_new_device(adapter, info)) {
+		adev->power.flags.ignore_parent = false;
+		dev_err(&adapter->dev,
+			"failed to add I2C device %s from ACPI\n",
+			dev_name(&adev->dev));
+	}
+}
+
+static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
+				       void *data, void **return_value)
+{
+	struct i2c_adapter *adapter = data;
+	struct acpi_device *adev;
+	struct i2c_board_info info;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (i2c_acpi_get_info(adev, &info, adapter, NULL))
+		return AE_OK;
+
+	i2c_acpi_register_device(adapter, adev, &info);
+
+	return AE_OK;
+}
+
+#define I2C_ACPI_MAX_SCAN_DEPTH 32
+
+/**
+ * i2c_acpi_register_devices - enumerate I2C slave devices behind adapter
+ * @adap: pointer to adapter
+ *
+ * Enumerate all I2C slave devices behind this adapter by walking the ACPI
+ * namespace. When a device is found it will be added to the Linux device
+ * model and bound to the corresponding ACPI handle.
+ */
+void i2c_acpi_register_devices(struct i2c_adapter *adap)
+{
+	acpi_status status;
+
+	if (!has_acpi_companion(&adap->dev))
+		return;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+				     I2C_ACPI_MAX_SCAN_DEPTH,
+				     i2c_acpi_add_device, NULL,
+				     adap, NULL);
+	if (ACPI_FAILURE(status))
+		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
+}
+
+static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
+					   void *data, void **return_value)
+{
+	struct i2c_acpi_lookup *lookup = data;
+	struct acpi_device *adev;
+
+	if (acpi_bus_get_device(handle, &adev))
+		return AE_OK;
+
+	if (i2c_acpi_do_lookup(adev, lookup))
+		return AE_OK;
+
+	if (lookup->search_handle != lookup->adapter_handle)
+		return AE_OK;
+
+	if (lookup->speed <= lookup->min_speed)
+		lookup->min_speed = lookup->speed;
+
+	return AE_OK;
+}
+
+/**
+ * i2c_acpi_find_bus_speed - find I2C bus speed from ACPI
+ * @dev: The device owning the bus
+ *
+ * Find the I2C bus speed by walking the ACPI namespace for all I2C slaves
+ * devices connected to this bus and use the speed of slowest device.
+ *
+ * Returns the speed in Hz or zero
+ */
+u32 i2c_acpi_find_bus_speed(struct device *dev)
+{
+	struct i2c_acpi_lookup lookup;
+	struct i2c_board_info dummy;
+	acpi_status status;
+
+	if (!has_acpi_companion(dev))
+		return 0;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.search_handle = ACPI_HANDLE(dev);
+	lookup.min_speed = UINT_MAX;
+	lookup.info = &dummy;
+	lookup.index = -1;
+
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+				     I2C_ACPI_MAX_SCAN_DEPTH,
+				     i2c_acpi_lookup_speed, NULL,
+				     &lookup, NULL);
+
+	if (ACPI_FAILURE(status)) {
+		dev_warn(dev, "unable to find I2C bus speed from ACPI\n");
+		return 0;
+	}
+
+	return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
+
+static int i2c_acpi_match_adapter(struct device *dev, void *data)
+{
+	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
+
+	if (!adapter)
+		return 0;
+
+	return ACPI_HANDLE(dev) == (acpi_handle)data;
+}
+
+static int i2c_acpi_match_device(struct device *dev, void *data)
+{
+	return ACPI_COMPANION(dev) == data;
+}
+
+static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, handle,
+			      i2c_acpi_match_adapter);
+	return dev ? i2c_verify_adapter(dev) : NULL;
+}
+
+static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device);
+	return dev ? i2c_verify_client(dev) : NULL;
+}
+
+static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
+			   void *arg)
+{
+	struct acpi_device *adev = arg;
+	struct i2c_board_info info;
+	acpi_handle adapter_handle;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+
+	switch (value) {
+	case ACPI_RECONFIG_DEVICE_ADD:
+		if (i2c_acpi_get_info(adev, &info, NULL, &adapter_handle))
+			break;
+
+		adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
+		if (!adapter)
+			break;
+
+		i2c_acpi_register_device(adapter, adev, &info);
+		break;
+	case ACPI_RECONFIG_DEVICE_REMOVE:
+		if (!acpi_device_enumerated(adev))
+			break;
+
+		client = i2c_acpi_find_client_by_adev(adev);
+		if (!client)
+			break;
+
+		i2c_unregister_device(client);
+		put_device(&client->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+struct notifier_block i2c_acpi_notifier = {
+	.notifier_call = i2c_acpi_notify,
+};
+
+/**
+ * i2c_acpi_new_device - Create i2c-client for the Nth I2cSerialBus resource
+ * @dev:     Device owning the ACPI resources to get the client from
+ * @index:   Index of ACPI resource to get
+ * @info:    describes the I2C device; note this is modified (addr gets set)
+ * Context: can sleep
+ *
+ * By default the i2c subsys creates an i2c-client for the first I2cSerialBus
+ * resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus
+ * resources, in that case this function can be used to create an i2c-client
+ * for other I2cSerialBus resources in the Current Resource Settings table.
+ *
+ * Also see i2c_new_device, which this function calls to create the i2c-client.
+ *
+ * Returns a pointer to the new i2c-client, or NULL if the adapter is not found.
+ */
+struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
+				       struct i2c_board_info *info)
+{
+	struct i2c_acpi_lookup lookup;
+	struct i2c_adapter *adapter;
+	struct acpi_device *adev;
+	LIST_HEAD(resource_list);
+	int ret;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return NULL;
+
+	memset(&lookup, 0, sizeof(lookup));
+	lookup.info = info;
+	lookup.device_handle = acpi_device_handle(adev);
+	lookup.index = index;
+
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     i2c_acpi_fill_info, &lookup);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (ret < 0 || !info->addr)
+		return NULL;
+
+	adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
+	if (!adapter)
+		return NULL;
+
+	return i2c_new_device(adapter, info);
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[2];
+	int ret;
+	u8 *buffer;
+
+	buffer = kzalloc(data_len, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = 1;
+	msgs[0].buf = &cmd;
+
+	msgs[1].addr = client->addr;
+	msgs[1].flags = client->flags | I2C_M_RD;
+	msgs[1].len = data_len;
+	msgs[1].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c read failed\n");
+	else
+		memcpy(data, buffer, data_len);
+
+	kfree(buffer);
+	return ret;
+}
+
+static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
+		u8 cmd, u8 *data, u8 data_len)
+{
+
+	struct i2c_msg msgs[1];
+	u8 *buffer;
+	int ret = AE_OK;
+
+	buffer = kzalloc(data_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return AE_NO_MEMORY;
+
+	buffer[0] = cmd;
+	memcpy(buffer + 1, data, data_len);
+
+	msgs[0].addr = client->addr;
+	msgs[0].flags = client->flags;
+	msgs[0].len = data_len + 1;
+	msgs[0].buf = buffer;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		dev_err(&client->adapter->dev, "i2c write failed\n");
+
+	kfree(buffer);
+	return ret;
+}
+
+static acpi_status
+i2c_acpi_space_handler(u32 function, acpi_physical_address command,
+			u32 bits, u64 *value64,
+			void *handler_context, void *region_context)
+{
+	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
+	struct i2c_acpi_handler_data *data = handler_context;
+	struct acpi_connection_info *info = &data->info;
+	struct acpi_resource_i2c_serialbus *sb;
+	struct i2c_adapter *adapter = data->adapter;
+	struct i2c_client *client;
+	struct acpi_resource *ares;
+	u32 accessor_type = function >> 16;
+	u8 action = function & ACPI_IO_MASK;
+	acpi_status ret;
+	int status;
+
+	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
+	if (ACPI_FAILURE(ret))
+		return ret;
+
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
+	if (!client) {
+		ret = AE_NO_MEMORY;
+		goto err;
+	}
+
+	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	sb = &ares->data.i2c_serial_bus;
+	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	client->adapter = adapter;
+	client->addr = sb->slave_address;
+
+	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
+		client->flags |= I2C_CLIENT_TEN;
+
+	switch (accessor_type) {
+	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte(client);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte(client, gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_byte_data(client, command);
+			if (status >= 0) {
+				gsb->bdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_byte_data(client, command,
+					gsb->bdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_WORD:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_word_data(client, command);
+			if (status >= 0) {
+				gsb->wdata = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_word_data(client, command,
+					gsb->wdata);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
+		if (action == ACPI_READ) {
+			status = i2c_smbus_read_block_data(client, command,
+					gsb->data);
+			if (status >= 0) {
+				gsb->len = status;
+				status = 0;
+			}
+		} else {
+			status = i2c_smbus_write_block_data(client, command,
+					gsb->len, gsb->data);
+		}
+		break;
+
+	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
+		if (action == ACPI_READ) {
+			status = acpi_gsb_i2c_read_bytes(client, command,
+					gsb->data, info->access_length);
+			if (status > 0)
+				status = 0;
+		} else {
+			status = acpi_gsb_i2c_write_bytes(client, command,
+					gsb->data, info->access_length);
+		}
+		break;
+
+	default:
+		dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
+			 accessor_type, client->addr);
+		ret = AE_BAD_PARAMETER;
+		goto err;
+	}
+
+	gsb->status = status;
+
+ err:
+	kfree(client);
+	ACPI_FREE(ares);
+	return ret;
+}
+
+
+int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle;
+	struct i2c_acpi_handler_data *data;
+	acpi_status status;
+
+	if (!adapter->dev.parent)
+		return -ENODEV;
+
+	handle = ACPI_HANDLE(adapter->dev.parent);
+
+	if (!handle)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct i2c_acpi_handler_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->adapter = adapter;
+	status = acpi_bus_attach_private_data(handle, (void *)data);
+	if (ACPI_FAILURE(status)) {
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	status = acpi_install_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&i2c_acpi_space_handler,
+				NULL,
+				data);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adapter->dev, "Error installing i2c space handler\n");
+		acpi_bus_detach_private_data(handle);
+		kfree(data);
+		return -ENOMEM;
+	}
+
+	acpi_walk_dep_device_list(handle);
+	return 0;
+}
+
+void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
+{
+	acpi_handle handle;
+	struct i2c_acpi_handler_data *data;
+	acpi_status status;
+
+	if (!adapter->dev.parent)
+		return;
+
+	handle = ACPI_HANDLE(adapter->dev.parent);
+
+	if (!handle)
+		return;
+
+	acpi_remove_address_space_handler(handle,
+				ACPI_ADR_SPACE_GSBUS,
+				&i2c_acpi_space_handler);
+
+	status = acpi_bus_get_private_data(handle, (void **)&data);
+	if (ACPI_SUCCESS(status))
+		kfree(data);
+
+	acpi_bus_detach_private_data(handle);
+}
+#endif /* CONFIG_ACPI_I2C_OPREGION */

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
new file mode 100644
index 0000000..c89dac7
--- /dev/null
+++ b/drivers/i2c/i2c-core-base.c

@@ -0,0 +1,2245 @@
+/*
+ * Linux I2C core
+ *
+ * Copyright (C) 1995-99 Simon G. Vogl
+ *   With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>
+ *   Mux support by Rodolfo Giometti <giometti@enneenne.com> and
+ *   Michael Lawnick <michael.lawnick.ext@nsn.com>
+ *
+ * Copyright (C) 2013-2017 Wolfram Sang <wsa@the-dreams.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "i2c-core: " fmt
+
+#include <dt-bindings/i2c/i2c.h>
+#include <linux/acpi.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_wakeirq.h>
+#include <linux/property.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+
+#include "i2c-core.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/i2c.h>
+
+#define I2C_ADDR_OFFSET_TEN_BIT	0xa000
+#define I2C_ADDR_OFFSET_SLAVE	0x1000
+
+#define I2C_ADDR_7BITS_MAX	0x77
+#define I2C_ADDR_7BITS_COUNT	(I2C_ADDR_7BITS_MAX + 1)
+
+/*
+ * core_lock protects i2c_adapter_idr, and guarantees that device detection,
+ * deletion of detected devices, and attach_adapter calls are serialized
+ */
+static DEFINE_MUTEX(core_lock);
+static DEFINE_IDR(i2c_adapter_idr);
+
+static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
+
+static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE;
+static bool is_registered;
+
+int i2c_transfer_trace_reg(void)
+{
+	static_key_slow_inc(&i2c_trace_msg);
+	return 0;
+}
+
+void i2c_transfer_trace_unreg(void)
+{
+	static_key_slow_dec(&i2c_trace_msg);
+}
+
+const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
+						const struct i2c_client *client)
+{
+	if (!(id && client))
+		return NULL;
+
+	while (id->name[0]) {
+		if (strcmp(client->name, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(i2c_match_id);
+
+static int i2c_device_match(struct device *dev, struct device_driver *drv)
+{
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_driver	*driver;
+
+
+	/* Attempt an OF style match */
+	if (i2c_of_match_device(drv->of_match_table, client))
+		return 1;
+
+	/* Then ACPI style match */
+	if (acpi_driver_match_device(dev, drv))
+		return 1;
+
+	driver = to_i2c_driver(drv);
+
+	/* Finally an I2C match */
+	if (i2c_match_id(driver->id_table, client))
+		return 1;
+
+	return 0;
+}
+
+static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int rc;
+
+	rc = acpi_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
+	return add_uevent_var(env, "MODALIAS=%s%s", I2C_MODULE_PREFIX, client->name);
+}
+
+/* i2c bus recovery routines */
+static int get_scl_gpio_value(struct i2c_adapter *adap)
+{
+	return gpio_get_value(adap->bus_recovery_info->scl_gpio);
+}
+
+static void set_scl_gpio_value(struct i2c_adapter *adap, int val)
+{
+	gpio_set_value(adap->bus_recovery_info->scl_gpio, val);
+}
+
+static int get_sda_gpio_value(struct i2c_adapter *adap)
+{
+	return gpio_get_value(adap->bus_recovery_info->sda_gpio);
+}
+
+static int i2c_get_gpios_for_recovery(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+	struct device *dev = &adap->dev;
+	int ret = 0;
+
+	ret = gpio_request_one(bri->scl_gpio, GPIOF_OPEN_DRAIN |
+			GPIOF_OUT_INIT_HIGH, "i2c-scl");
+	if (ret) {
+		dev_warn(dev, "Can't get SCL gpio: %d\n", bri->scl_gpio);
+		return ret;
+	}
+
+	if (bri->get_sda) {
+		if (gpio_request_one(bri->sda_gpio, GPIOF_IN, "i2c-sda")) {
+			/* work without SDA polling */
+			dev_warn(dev, "Can't get SDA gpio: %d. Not using SDA polling\n",
+					bri->sda_gpio);
+			bri->get_sda = NULL;
+		}
+	}
+
+	return ret;
+}
+
+static void i2c_put_gpios_for_recovery(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+
+	if (bri->get_sda)
+		gpio_free(bri->sda_gpio);
+
+	gpio_free(bri->scl_gpio);
+}
+
+/*
+ * We are generating clock pulses. ndelay() determines durating of clk pulses.
+ * We will generate clock with rate 100 KHz and so duration of both clock levels
+ * is: delay in ns = (10^6 / 100) / 2
+ */
+#define RECOVERY_NDELAY		5000
+#define RECOVERY_CLK_CNT	9
+
+static int i2c_generic_recovery(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+	int i = 0, val = 1, ret = 0;
+
+	if (bri->prepare_recovery)
+		bri->prepare_recovery(adap);
+
+	bri->set_scl(adap, val);
+	ndelay(RECOVERY_NDELAY);
+
+	/*
+	 * By this time SCL is high, as we need to give 9 falling-rising edges
+	 */
+	while (i++ < RECOVERY_CLK_CNT * 2) {
+		if (val) {
+			/* Break if SDA is high */
+			if (bri->get_sda && bri->get_sda(adap))
+					break;
+			/* SCL shouldn't be low here */
+			if (!bri->get_scl(adap)) {
+				dev_err(&adap->dev,
+					"SCL is stuck low, exit recovery\n");
+				ret = -EBUSY;
+				break;
+			}
+		}
+
+		val = !val;
+		bri->set_scl(adap, val);
+		ndelay(RECOVERY_NDELAY);
+	}
+
+	if (bri->unprepare_recovery)
+		bri->unprepare_recovery(adap);
+
+	return ret;
+}
+
+int i2c_generic_scl_recovery(struct i2c_adapter *adap)
+{
+	return i2c_generic_recovery(adap);
+}
+EXPORT_SYMBOL_GPL(i2c_generic_scl_recovery);
+
+int i2c_generic_gpio_recovery(struct i2c_adapter *adap)
+{
+	int ret;
+
+	ret = i2c_get_gpios_for_recovery(adap);
+	if (ret)
+		return ret;
+
+	ret = i2c_generic_recovery(adap);
+	i2c_put_gpios_for_recovery(adap);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_generic_gpio_recovery);
+
+int i2c_recover_bus(struct i2c_adapter *adap)
+{
+	if (!adap->bus_recovery_info)
+		return -EOPNOTSUPP;
+
+	dev_dbg(&adap->dev, "Trying i2c bus recovery\n");
+	return adap->bus_recovery_info->recover_bus(adap);
+}
+EXPORT_SYMBOL_GPL(i2c_recover_bus);
+
+static void i2c_init_recovery(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+	char *err_str;
+
+	if (!bri)
+		return;
+
+	if (!bri->recover_bus) {
+		err_str = "no recover_bus() found";
+		goto err;
+	}
+
+	/* Generic GPIO recovery */
+	if (bri->recover_bus == i2c_generic_gpio_recovery) {
+		if (!gpio_is_valid(bri->scl_gpio)) {
+			err_str = "invalid SCL gpio";
+			goto err;
+		}
+
+		if (gpio_is_valid(bri->sda_gpio))
+			bri->get_sda = get_sda_gpio_value;
+		else
+			bri->get_sda = NULL;
+
+		bri->get_scl = get_scl_gpio_value;
+		bri->set_scl = set_scl_gpio_value;
+	} else if (bri->recover_bus == i2c_generic_scl_recovery) {
+		/* Generic SCL recovery */
+		if (!bri->set_scl || !bri->get_scl) {
+			err_str = "no {get|set}_scl() found";
+			goto err;
+		}
+	}
+
+	return;
+ err:
+	dev_err(&adap->dev, "Not using recovery: %s\n", err_str);
+	adap->bus_recovery_info = NULL;
+}
+
+static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
+{
+	struct i2c_adapter *adap = client->adapter;
+	unsigned int irq;
+
+	if (!adap->host_notify_domain)
+		return -ENXIO;
+
+	if (client->flags & I2C_CLIENT_TEN)
+		return -EINVAL;
+
+	irq = irq_find_mapping(adap->host_notify_domain, client->addr);
+	if (!irq)
+		irq = irq_create_mapping(adap->host_notify_domain,
+					 client->addr);
+
+	return irq > 0 ? irq : -ENXIO;
+}
+
+static int i2c_device_probe(struct device *dev)
+{
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_driver	*driver;
+	int status;
+
+	if (!client)
+		return 0;
+
+	driver = to_i2c_driver(dev->driver);
+
+	if (!client->irq && !driver->disable_i2c_core_irq_mapping) {
+		int irq = -ENOENT;
+
+		if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
+			dev_dbg(dev, "Using Host Notify IRQ\n");
+			irq = i2c_smbus_host_notify_to_irq(client);
+		} else if (dev->of_node) {
+			irq = of_irq_get_byname(dev->of_node, "irq");
+			if (irq == -EINVAL || irq == -ENODATA)
+				irq = of_irq_get(dev->of_node, 0);
+		} else if (ACPI_COMPANION(dev)) {
+			irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(dev), 0);
+		}
+		if (irq == -EPROBE_DEFER)
+			return irq;
+
+		if (irq < 0)
+			irq = 0;
+
+		client->irq = irq;
+	}
+
+	/*
+	 * An I2C ID table is not mandatory, if and only if, a suitable Device
+	 * Tree match table entry is supplied for the probing device.
+	 */
+	if (!driver->id_table &&
+	    !i2c_of_match_device(dev->driver->of_match_table, client))
+		return -ENODEV;
+
+	if (client->flags & I2C_CLIENT_WAKE) {
+		int wakeirq = -ENOENT;
+
+		if (dev->of_node) {
+			wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
+			if (wakeirq == -EPROBE_DEFER)
+				return wakeirq;
+		}
+
+		device_init_wakeup(&client->dev, true);
+
+		if (wakeirq > 0 && wakeirq != client->irq)
+			status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
+		else if (client->irq > 0)
+			status = dev_pm_set_wake_irq(dev, client->irq);
+		else
+			status = 0;
+
+		if (status)
+			dev_warn(&client->dev, "failed to set up wakeup irq\n");
+	}
+
+	dev_dbg(dev, "probe\n");
+
+	status = of_clk_set_defaults(dev->of_node, false);
+	if (status < 0)
+		goto err_clear_wakeup_irq;
+
+	status = dev_pm_domain_attach(&client->dev, true);
+	if (status == -EPROBE_DEFER)
+		goto err_clear_wakeup_irq;
+
+	/*
+	 * When there are no more users of probe(),
+	 * rename probe_new to probe.
+	 */
+	if (driver->probe_new)
+		status = driver->probe_new(client);
+	else if (driver->probe)
+		status = driver->probe(client,
+				       i2c_match_id(driver->id_table, client));
+	else
+		status = -EINVAL;
+
+	if (status)
+		goto err_detach_pm_domain;
+
+	return 0;
+
+err_detach_pm_domain:
+	dev_pm_domain_detach(&client->dev, true);
+err_clear_wakeup_irq:
+	dev_pm_clear_wake_irq(&client->dev);
+	device_init_wakeup(&client->dev, false);
+	return status;
+}
+
+static int i2c_device_remove(struct device *dev)
+{
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_driver	*driver;
+	int status = 0;
+
+	if (!client || !dev->driver)
+		return 0;
+
+	driver = to_i2c_driver(dev->driver);
+	if (driver->remove) {
+		dev_dbg(dev, "remove\n");
+		status = driver->remove(client);
+	}
+
+	dev_pm_domain_detach(&client->dev, true);
+
+	dev_pm_clear_wake_irq(&client->dev);
+	device_init_wakeup(&client->dev, false);
+
+	return status;
+}
+
+static void i2c_device_shutdown(struct device *dev)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	struct i2c_driver *driver;
+
+	if (!client || !dev->driver)
+		return;
+	driver = to_i2c_driver(dev->driver);
+	if (driver->shutdown)
+		driver->shutdown(client);
+}
+
+static void i2c_client_dev_release(struct device *dev)
+{
+	kfree(to_i2c_client(dev));
+}
+
+static ssize_t
+show_name(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", dev->type == &i2c_client_type ?
+		       to_i2c_client(dev)->name : to_i2c_adapter(dev)->name);
+}
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+static ssize_t
+show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int len;
+
+	len = acpi_device_modalias(dev, buf, PAGE_SIZE -1);
+	if (len != -ENODEV)
+		return len;
+
+	return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name);
+}
+static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL);
+
+static struct attribute *i2c_dev_attrs[] = {
+	&dev_attr_name.attr,
+	/* modalias helps coldplug:  modprobe $(cat .../modalias) */
+	&dev_attr_modalias.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(i2c_dev);
+
+struct bus_type i2c_bus_type = {
+	.name		= "i2c",
+	.match		= i2c_device_match,
+	.probe		= i2c_device_probe,
+	.remove		= i2c_device_remove,
+	.shutdown	= i2c_device_shutdown,
+};
+EXPORT_SYMBOL_GPL(i2c_bus_type);
+
+struct device_type i2c_client_type = {
+	.groups		= i2c_dev_groups,
+	.uevent		= i2c_device_uevent,
+	.release	= i2c_client_dev_release,
+};
+EXPORT_SYMBOL_GPL(i2c_client_type);
+
+
+/**
+ * i2c_verify_client - return parameter as i2c_client, or NULL
+ * @dev: device, probably from some driver model iterator
+ *
+ * When traversing the driver model tree, perhaps using driver model
+ * iterators like @device_for_each_child(), you can't assume very much
+ * about the nodes you find.  Use this function to avoid oopses caused
+ * by wrongly treating some non-I2C device as an i2c_client.
+ */
+struct i2c_client *i2c_verify_client(struct device *dev)
+{
+	return (dev->type == &i2c_client_type)
+			? to_i2c_client(dev)
+			: NULL;
+}
+EXPORT_SYMBOL(i2c_verify_client);
+
+
+/* Return a unique address which takes the flags of the client into account */
+static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
+{
+	unsigned short addr = client->addr;
+
+	/* For some client flags, add an arbitrary offset to avoid collisions */
+	if (client->flags & I2C_CLIENT_TEN)
+		addr |= I2C_ADDR_OFFSET_TEN_BIT;
+
+	if (client->flags & I2C_CLIENT_SLAVE)
+		addr |= I2C_ADDR_OFFSET_SLAVE;
+
+	return addr;
+}
+
+/* This is a permissive address validity check, I2C address map constraints
+ * are purposely not enforced, except for the general call address. */
+int i2c_check_addr_validity(unsigned addr, unsigned short flags)
+{
+	if (flags & I2C_CLIENT_TEN) {
+		/* 10-bit address, all values are valid */
+		if (addr > 0x3ff)
+			return -EINVAL;
+	} else {
+		/* 7-bit address, reject the general call address */
+		if (addr == 0x00 || addr > 0x7f)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* And this is a strict address validity check, used when probing. If a
+ * device uses a reserved address, then it shouldn't be probed. 7-bit
+ * addressing is assumed, 10-bit address devices are rare and should be
+ * explicitly enumerated. */
+int i2c_check_7bit_addr_validity_strict(unsigned short addr)
+{
+	/*
+	 * Reserved addresses per I2C specification:
+	 *  0x00       General call address / START byte
+	 *  0x01       CBUS address
+	 *  0x02       Reserved for different bus format
+	 *  0x03       Reserved for future purposes
+	 *  0x04-0x07  Hs-mode master code
+	 *  0x78-0x7b  10-bit slave addressing
+	 *  0x7c-0x7f  Reserved for future purposes
+	 */
+	if (addr < 0x08 || addr > 0x77)
+		return -EINVAL;
+	return 0;
+}
+
+static int __i2c_check_addr_busy(struct device *dev, void *addrp)
+{
+	struct i2c_client	*client = i2c_verify_client(dev);
+	int			addr = *(int *)addrp;
+
+	if (client && i2c_encode_flags_to_addr(client) == addr)
+		return -EBUSY;
+	return 0;
+}
+
+/* walk up mux tree */
+static int i2c_check_mux_parents(struct i2c_adapter *adapter, int addr)
+{
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+	int result;
+
+	result = device_for_each_child(&adapter->dev, &addr,
+					__i2c_check_addr_busy);
+
+	if (!result && parent)
+		result = i2c_check_mux_parents(parent, addr);
+
+	return result;
+}
+
+/* recurse down mux tree */
+static int i2c_check_mux_children(struct device *dev, void *addrp)
+{
+	int result;
+
+	if (dev->type == &i2c_adapter_type)
+		result = device_for_each_child(dev, addrp,
+						i2c_check_mux_children);
+	else
+		result = __i2c_check_addr_busy(dev, addrp);
+
+	return result;
+}
+
+static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
+{
+	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
+	int result = 0;
+
+	if (parent)
+		result = i2c_check_mux_parents(parent, addr);
+
+	if (!result)
+		result = device_for_each_child(&adapter->dev, &addr,
+						i2c_check_mux_children);
+
+	return result;
+}
+
+/**
+ * i2c_adapter_lock_bus - Get exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	locks only this branch in the adapter tree
+ */
+static void i2c_adapter_lock_bus(struct i2c_adapter *adapter,
+				 unsigned int flags)
+{
+	rt_mutex_lock(&adapter->bus_lock);
+}
+
+/**
+ * i2c_adapter_trylock_bus - Try to get exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER trylocks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	trylocks only this branch in the adapter tree
+ */
+static int i2c_adapter_trylock_bus(struct i2c_adapter *adapter,
+				   unsigned int flags)
+{
+	return rt_mutex_trylock(&adapter->bus_lock);
+}
+
+/**
+ * i2c_adapter_unlock_bus - Release exclusive access to an I2C bus segment
+ * @adapter: Target I2C bus segment
+ * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
+ *	unlocks only this branch in the adapter tree
+ */
+static void i2c_adapter_unlock_bus(struct i2c_adapter *adapter,
+				   unsigned int flags)
+{
+	rt_mutex_unlock(&adapter->bus_lock);
+}
+
+static void i2c_dev_set_name(struct i2c_adapter *adap,
+			     struct i2c_client *client)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+
+	if (adev) {
+		dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
+		return;
+	}
+
+	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
+		     i2c_encode_flags_to_addr(client));
+}
+
+static int i2c_dev_irq_from_resources(const struct resource *resources,
+				      unsigned int num_resources)
+{
+	struct irq_data *irqd;
+	int i;
+
+	for (i = 0; i < num_resources; i++) {
+		const struct resource *r = &resources[i];
+
+		if (resource_type(r) != IORESOURCE_IRQ)
+			continue;
+
+		if (r->flags & IORESOURCE_BITS) {
+			irqd = irq_get_irq_data(r->start);
+			if (!irqd)
+				break;
+
+			irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
+		}
+
+		return r->start;
+	}
+
+	return 0;
+}
+
+/**
+ * i2c_new_device - instantiate an i2c device
+ * @adap: the adapter managing the device
+ * @info: describes one I2C device; bus_num is ignored
+ * Context: can sleep
+ *
+ * Create an i2c device. Binding is handled through driver model
+ * probe()/remove() methods.  A driver may be bound to this device when we
+ * return from this function, or any later moment (e.g. maybe hotplugging will
+ * load the driver module).  This call is not appropriate for use by mainboard
+ * initialization logic, which usually runs during an arch_initcall() long
+ * before any i2c_adapter could exist.
+ *
+ * This returns the new i2c client, which may be saved for later use with
+ * i2c_unregister_device(); or NULL to indicate an error.
+ */
+struct i2c_client *
+i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
+{
+	struct i2c_client	*client;
+	int			status;
+
+	client = kzalloc(sizeof *client, GFP_KERNEL);
+	if (!client)
+		return NULL;
+
+	client->adapter = adap;
+
+	client->dev.platform_data = info->platform_data;
+
+	if (info->archdata)
+		client->dev.archdata = *info->archdata;
+
+	client->flags = info->flags;
+	client->addr = info->addr;
+
+	client->irq = info->irq;
+	if (!client->irq)
+		client->irq = i2c_dev_irq_from_resources(info->resources,
+							 info->num_resources);
+
+	strlcpy(client->name, info->type, sizeof(client->name));
+
+	status = i2c_check_addr_validity(client->addr, client->flags);
+	if (status) {
+		dev_err(&adap->dev, "Invalid %d-bit I2C address 0x%02hx\n",
+			client->flags & I2C_CLIENT_TEN ? 10 : 7, client->addr);
+		goto out_err_silent;
+	}
+
+	/* Check for address business */
+	status = i2c_check_addr_busy(adap, i2c_encode_flags_to_addr(client));
+	if (status)
+		goto out_err;
+
+	client->dev.parent = &client->adapter->dev;
+	client->dev.bus = &i2c_bus_type;
+	client->dev.type = &i2c_client_type;
+	client->dev.of_node = info->of_node;
+	client->dev.fwnode = info->fwnode;
+
+	i2c_dev_set_name(adap, client);
+
+	if (info->properties) {
+		status = device_add_properties(&client->dev, info->properties);
+		if (status) {
+			dev_err(&adap->dev,
+				"Failed to add properties to client %s: %d\n",
+				client->name, status);
+			goto out_err;
+		}
+	}
+
+	status = device_register(&client->dev);
+	if (status)
+		goto out_free_props;
+
+	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
+		client->name, dev_name(&client->dev));
+
+	return client;
+
+out_free_props:
+	if (info->properties)
+		device_remove_properties(&client->dev);
+out_err:
+	dev_err(&adap->dev,
+		"Failed to register i2c client %s at 0x%02x (%d)\n",
+		client->name, client->addr, status);
+out_err_silent:
+	kfree(client);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(i2c_new_device);
+
+
+/**
+ * i2c_unregister_device - reverse effect of i2c_new_device()
+ * @client: value returned from i2c_new_device()
+ * Context: can sleep
+ */
+void i2c_unregister_device(struct i2c_client *client)
+{
+	if (client->dev.of_node)
+		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
+	if (ACPI_COMPANION(&client->dev))
+		acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
+	device_unregister(&client->dev);
+}
+EXPORT_SYMBOL_GPL(i2c_unregister_device);
+
+
+static const struct i2c_device_id dummy_id[] = {
+	{ "dummy", 0 },
+	{ },
+};
+
+static int dummy_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	return 0;
+}
+
+static int dummy_remove(struct i2c_client *client)
+{
+	return 0;
+}
+
+static struct i2c_driver dummy_driver = {
+	.driver.name	= "dummy",
+	.probe		= dummy_probe,
+	.remove		= dummy_remove,
+	.id_table	= dummy_id,
+};
+
+/**
+ * i2c_new_dummy - return a new i2c device bound to a dummy driver
+ * @adapter: the adapter managing the device
+ * @address: seven bit address to be used
+ * Context: can sleep
+ *
+ * This returns an I2C client bound to the "dummy" driver, intended for use
+ * with devices that consume multiple addresses.  Examples of such chips
+ * include various EEPROMS (like 24c04 and 24c08 models).
+ *
+ * These dummy devices have two main uses.  First, most I2C and SMBus calls
+ * except i2c_transfer() need a client handle; the dummy will be that handle.
+ * And second, this prevents the specified address from being bound to a
+ * different driver.
+ *
+ * This returns the new i2c client, which should be saved for later use with
+ * i2c_unregister_device(); or NULL to indicate an error.
+ */
+struct i2c_client *i2c_new_dummy(struct i2c_adapter *adapter, u16 address)
+{
+	struct i2c_board_info info = {
+		I2C_BOARD_INFO("dummy", address),
+	};
+
+	return i2c_new_device(adapter, &info);
+}
+EXPORT_SYMBOL_GPL(i2c_new_dummy);
+
+/**
+ * i2c_new_secondary_device - Helper to get the instantiated secondary address
+ * and create the associated device
+ * @client: Handle to the primary client
+ * @name: Handle to specify which secondary address to get
+ * @default_addr: Used as a fallback if no secondary address was specified
+ * Context: can sleep
+ *
+ * I2C clients can be composed of multiple I2C slaves bound together in a single
+ * component. The I2C client driver then binds to the master I2C slave and needs
+ * to create I2C dummy clients to communicate with all the other slaves.
+ *
+ * This function creates and returns an I2C dummy client whose I2C address is
+ * retrieved from the platform firmware based on the given slave name. If no
+ * address is specified by the firmware default_addr is used.
+ *
+ * On DT-based platforms the address is retrieved from the "reg" property entry
+ * cell whose "reg-names" value matches the slave name.
+ *
+ * This returns the new i2c client, which should be saved for later use with
+ * i2c_unregister_device(); or NULL to indicate an error.
+ */
+struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
+						const char *name,
+						u16 default_addr)
+{
+	struct device_node *np = client->dev.of_node;
+	u32 addr = default_addr;
+	int i;
+
+	if (np) {
+		i = of_property_match_string(np, "reg-names", name);
+		if (i >= 0)
+			of_property_read_u32_index(np, "reg", i, &addr);
+	}
+
+	dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
+	return i2c_new_dummy(client->adapter, addr);
+}
+EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
+
+/* ------------------------------------------------------------------------- */
+
+/* I2C bus adapters -- one roots each I2C or SMBUS segment */
+
+static void i2c_adapter_dev_release(struct device *dev)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(dev);
+	complete(&adap->dev_released);
+}
+
+unsigned int i2c_adapter_depth(struct i2c_adapter *adapter)
+{
+	unsigned int depth = 0;
+
+	while ((adapter = i2c_parent_is_i2c_adapter(adapter)))
+		depth++;
+
+	WARN_ONCE(depth >= MAX_LOCKDEP_SUBCLASSES,
+		  "adapter depth exceeds lockdep subclass limit\n");
+
+	return depth;
+}
+EXPORT_SYMBOL_GPL(i2c_adapter_depth);
+
+/*
+ * Let users instantiate I2C devices through sysfs. This can be used when
+ * platform initialization code doesn't contain the proper data for
+ * whatever reason. Also useful for drivers that do device detection and
+ * detection fails, either because the device uses an unexpected address,
+ * or this is a compatible device with different ID register values.
+ *
+ * Parameter checking may look overzealous, but we really don't want
+ * the user to provide incorrect parameters.
+ */
+static ssize_t
+i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(dev);
+	struct i2c_board_info info;
+	struct i2c_client *client;
+	char *blank, end;
+	int res;
+
+	memset(&info, 0, sizeof(struct i2c_board_info));
+
+	blank = strchr(buf, ' ');
+	if (!blank) {
+		dev_err(dev, "%s: Missing parameters\n", "new_device");
+		return -EINVAL;
+	}
+	if (blank - buf > I2C_NAME_SIZE - 1) {
+		dev_err(dev, "%s: Invalid device name\n", "new_device");
+		return -EINVAL;
+	}
+	memcpy(info.type, buf, blank - buf);
+
+	/* Parse remaining parameters, reject extra parameters */
+	res = sscanf(++blank, "%hi%c", &info.addr, &end);
+	if (res < 1) {
+		dev_err(dev, "%s: Can't parse I2C address\n", "new_device");
+		return -EINVAL;
+	}
+	if (res > 1  && end != '\n') {
+		dev_err(dev, "%s: Extra parameters\n", "new_device");
+		return -EINVAL;
+	}
+
+	if ((info.addr & I2C_ADDR_OFFSET_TEN_BIT) == I2C_ADDR_OFFSET_TEN_BIT) {
+		info.addr &= ~I2C_ADDR_OFFSET_TEN_BIT;
+		info.flags |= I2C_CLIENT_TEN;
+	}
+
+	if (info.addr & I2C_ADDR_OFFSET_SLAVE) {
+		info.addr &= ~I2C_ADDR_OFFSET_SLAVE;
+		info.flags |= I2C_CLIENT_SLAVE;
+	}
+
+	client = i2c_new_device(adap, &info);
+	if (!client)
+		return -EINVAL;
+
+	/* Keep track of the added device */
+	mutex_lock(&adap->userspace_clients_lock);
+	list_add_tail(&client->detected, &adap->userspace_clients);
+	mutex_unlock(&adap->userspace_clients_lock);
+	dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
+		 info.type, info.addr);
+
+	return count;
+}
+static DEVICE_ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device);
+
+/*
+ * And of course let the users delete the devices they instantiated, if
+ * they got it wrong. This interface can only be used to delete devices
+ * instantiated by i2c_sysfs_new_device above. This guarantees that we
+ * don't delete devices to which some kernel code still has references.
+ *
+ * Parameter checking may look overzealous, but we really don't want
+ * the user to delete the wrong device.
+ */
+static ssize_t
+i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct i2c_adapter *adap = to_i2c_adapter(dev);
+	struct i2c_client *client, *next;
+	unsigned short addr;
+	char end;
+	int res;
+
+	/* Parse parameters, reject extra parameters */
+	res = sscanf(buf, "%hi%c", &addr, &end);
+	if (res < 1) {
+		dev_err(dev, "%s: Can't parse I2C address\n", "delete_device");
+		return -EINVAL;
+	}
+	if (res > 1  && end != '\n') {
+		dev_err(dev, "%s: Extra parameters\n", "delete_device");
+		return -EINVAL;
+	}
+
+	/* Make sure the device was added through sysfs */
+	res = -ENOENT;
+	mutex_lock_nested(&adap->userspace_clients_lock,
+			  i2c_adapter_depth(adap));
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		if (i2c_encode_flags_to_addr(client) == addr) {
+			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
+				 "delete_device", client->name, client->addr);
+
+			list_del(&client->detected);
+			i2c_unregister_device(client);
+			res = count;
+			break;
+		}
+	}
+	mutex_unlock(&adap->userspace_clients_lock);
+
+	if (res < 0)
+		dev_err(dev, "%s: Can't find device in list\n",
+			"delete_device");
+	return res;
+}
+static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL,
+				   i2c_sysfs_delete_device);
+
+static struct attribute *i2c_adapter_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_new_device.attr,
+	&dev_attr_delete_device.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(i2c_adapter);
+
+struct device_type i2c_adapter_type = {
+	.groups		= i2c_adapter_groups,
+	.release	= i2c_adapter_dev_release,
+};
+EXPORT_SYMBOL_GPL(i2c_adapter_type);
+
+/**
+ * i2c_verify_adapter - return parameter as i2c_adapter or NULL
+ * @dev: device, probably from some driver model iterator
+ *
+ * When traversing the driver model tree, perhaps using driver model
+ * iterators like @device_for_each_child(), you can't assume very much
+ * about the nodes you find.  Use this function to avoid oopses caused
+ * by wrongly treating some non-I2C device as an i2c_adapter.
+ */
+struct i2c_adapter *i2c_verify_adapter(struct device *dev)
+{
+	return (dev->type == &i2c_adapter_type)
+			? to_i2c_adapter(dev)
+			: NULL;
+}
+EXPORT_SYMBOL(i2c_verify_adapter);
+
+#ifdef CONFIG_I2C_COMPAT
+static struct class_compat *i2c_adapter_compat_class;
+#endif
+
+static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
+{
+	struct i2c_devinfo	*devinfo;
+
+	down_read(&__i2c_board_lock);
+	list_for_each_entry(devinfo, &__i2c_board_list, list) {
+		if (devinfo->busnum == adapter->nr
+				&& !i2c_new_device(adapter,
+						&devinfo->board_info))
+			dev_err(&adapter->dev,
+				"Can't create device at 0x%02x\n",
+				devinfo->board_info.addr);
+	}
+	up_read(&__i2c_board_lock);
+}
+
+static int i2c_do_add_adapter(struct i2c_driver *driver,
+			      struct i2c_adapter *adap)
+{
+	/* Detect supported devices on that bus, and instantiate them */
+	i2c_detect(adap, driver);
+
+	/* Let legacy drivers scan this bus for matching devices */
+	if (driver->attach_adapter) {
+		dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n",
+			 driver->driver.name);
+		dev_warn(&adap->dev,
+			 "Please use another way to instantiate your i2c_client\n");
+		/* We ignore the return code; if it fails, too bad */
+		driver->attach_adapter(adap);
+	}
+	return 0;
+}
+
+static int __process_new_adapter(struct device_driver *d, void *data)
+{
+	return i2c_do_add_adapter(to_i2c_driver(d), data);
+}
+
+static const struct i2c_lock_operations i2c_adapter_lock_ops = {
+	.lock_bus =    i2c_adapter_lock_bus,
+	.trylock_bus = i2c_adapter_trylock_bus,
+	.unlock_bus =  i2c_adapter_unlock_bus,
+};
+
+static void i2c_host_notify_irq_teardown(struct i2c_adapter *adap)
+{
+	struct irq_domain *domain = adap->host_notify_domain;
+	irq_hw_number_t hwirq;
+
+	if (!domain)
+		return;
+
+	for (hwirq = 0 ; hwirq < I2C_ADDR_7BITS_COUNT ; hwirq++)
+		irq_dispose_mapping(irq_find_mapping(domain, hwirq));
+
+	irq_domain_remove(domain);
+	adap->host_notify_domain = NULL;
+}
+
+static int i2c_host_notify_irq_map(struct irq_domain *h,
+					  unsigned int virq,
+					  irq_hw_number_t hw_irq_num)
+{
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops i2c_host_notify_irq_ops = {
+	.map = i2c_host_notify_irq_map,
+};
+
+static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap)
+{
+	struct irq_domain *domain;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY))
+		return 0;
+
+	domain = irq_domain_create_linear(adap->dev.fwnode,
+					  I2C_ADDR_7BITS_COUNT,
+					  &i2c_host_notify_irq_ops, adap);
+	if (!domain)
+		return -ENOMEM;
+
+	adap->host_notify_domain = domain;
+
+	return 0;
+}
+
+/**
+ * i2c_handle_smbus_host_notify - Forward a Host Notify event to the correct
+ * I2C client.
+ * @adap: the adapter
+ * @addr: the I2C address of the notifying device
+ * Context: can't sleep
+ *
+ * Helper function to be called from an I2C bus driver's interrupt
+ * handler. It will schedule the Host Notify IRQ.
+ */
+int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
+{
+	int irq;
+
+	if (!adap)
+		return -EINVAL;
+
+	irq = irq_find_mapping(adap->host_notify_domain, addr);
+	if (irq <= 0)
+		return -ENXIO;
+
+	generic_handle_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);
+
+static int i2c_register_adapter(struct i2c_adapter *adap)
+{
+	int res = -EINVAL;
+
+	/* Can't register until after driver model init */
+	if (WARN_ON(!is_registered)) {
+		res = -EAGAIN;
+		goto out_list;
+	}
+
+	/* Sanity checks */
+	if (WARN(!adap->name[0], "i2c adapter has no name"))
+		goto out_list;
+
+	if (!adap->algo) {
+		pr_err("adapter '%s': no algo supplied!\n", adap->name);
+		goto out_list;
+	}
+
+	if (!adap->lock_ops)
+		adap->lock_ops = &i2c_adapter_lock_ops;
+
+	rt_mutex_init(&adap->bus_lock);
+	rt_mutex_init(&adap->mux_lock);
+	mutex_init(&adap->userspace_clients_lock);
+	INIT_LIST_HEAD(&adap->userspace_clients);
+
+	/* Set default timeout to 1 second if not already set */
+	if (adap->timeout == 0)
+		adap->timeout = HZ;
+
+	/* register soft irqs for Host Notify */
+	res = i2c_setup_host_notify_irq_domain(adap);
+	if (res) {
+		pr_err("adapter '%s': can't create Host Notify IRQs (%d)\n",
+		       adap->name, res);
+		goto out_list;
+	}
+
+	dev_set_name(&adap->dev, "i2c-%d", adap->nr);
+	adap->dev.bus = &i2c_bus_type;
+	adap->dev.type = &i2c_adapter_type;
+	res = device_register(&adap->dev);
+	if (res) {
+		pr_err("adapter '%s': can't register device (%d)\n", adap->name, res);
+		goto out_list;
+	}
+
+	dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
+
+	pm_runtime_no_callbacks(&adap->dev);
+	pm_suspend_ignore_children(&adap->dev, true);
+	pm_runtime_enable(&adap->dev);
+
+#ifdef CONFIG_I2C_COMPAT
+	res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
+				       adap->dev.parent);
+	if (res)
+		dev_warn(&adap->dev,
+			 "Failed to create compatibility class link\n");
+#endif
+
+	i2c_init_recovery(adap);
+
+	/* create pre-declared device nodes */
+	of_i2c_register_devices(adap);
+	i2c_acpi_register_devices(adap);
+	i2c_acpi_install_space_handler(adap);
+
+	if (adap->nr < __i2c_first_dynamic_bus_num)
+		i2c_scan_static_board_info(adap);
+
+	/* Notify drivers */
+	mutex_lock(&core_lock);
+	bus_for_each_drv(&i2c_bus_type, NULL, adap, __process_new_adapter);
+	mutex_unlock(&core_lock);
+
+	return 0;
+
+out_list:
+	mutex_lock(&core_lock);
+	idr_remove(&i2c_adapter_idr, adap->nr);
+	mutex_unlock(&core_lock);
+	return res;
+}
+
+/**
+ * __i2c_add_numbered_adapter - i2c_add_numbered_adapter where nr is never -1
+ * @adap: the adapter to register (with adap->nr initialized)
+ * Context: can sleep
+ *
+ * See i2c_add_numbered_adapter() for details.
+ */
+static int __i2c_add_numbered_adapter(struct i2c_adapter *adap)
+{
+	int id;
+
+	mutex_lock(&core_lock);
+	id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, GFP_KERNEL);
+	mutex_unlock(&core_lock);
+	if (WARN(id < 0, "couldn't get idr"))
+		return id == -ENOSPC ? -EBUSY : id;
+
+	return i2c_register_adapter(adap);
+}
+
+/**
+ * i2c_add_adapter - declare i2c adapter, use dynamic bus number
+ * @adapter: the adapter to add
+ * Context: can sleep
+ *
+ * This routine is used to declare an I2C adapter when its bus number
+ * doesn't matter or when its bus number is specified by an dt alias.
+ * Examples of bases when the bus number doesn't matter: I2C adapters
+ * dynamically added by USB links or PCI plugin cards.
+ *
+ * When this returns zero, a new bus number was allocated and stored
+ * in adap->nr, and the specified adapter became available for clients.
+ * Otherwise, a negative errno value is returned.
+ */
+int i2c_add_adapter(struct i2c_adapter *adapter)
+{
+	struct device *dev = &adapter->dev;
+	int id;
+
+	if (dev->of_node) {
+		id = of_alias_get_id(dev->of_node, "i2c");
+		if (id >= 0) {
+			adapter->nr = id;
+			return __i2c_add_numbered_adapter(adapter);
+		}
+	}
+
+	mutex_lock(&core_lock);
+	id = idr_alloc(&i2c_adapter_idr, adapter,
+		       __i2c_first_dynamic_bus_num, 0, GFP_KERNEL);
+	mutex_unlock(&core_lock);
+	if (WARN(id < 0, "couldn't get idr"))
+		return id;
+
+	adapter->nr = id;
+
+	return i2c_register_adapter(adapter);
+}
+EXPORT_SYMBOL(i2c_add_adapter);
+
+/**
+ * i2c_add_numbered_adapter - declare i2c adapter, use static bus number
+ * @adap: the adapter to register (with adap->nr initialized)
+ * Context: can sleep
+ *
+ * This routine is used to declare an I2C adapter when its bus number
+ * matters.  For example, use it for I2C adapters from system-on-chip CPUs,
+ * or otherwise built in to the system's mainboard, and where i2c_board_info
+ * is used to properly configure I2C devices.
+ *
+ * If the requested bus number is set to -1, then this function will behave
+ * identically to i2c_add_adapter, and will dynamically assign a bus number.
+ *
+ * If no devices have pre-been declared for this bus, then be sure to
+ * register the adapter before any dynamically allocated ones.  Otherwise
+ * the required bus ID may not be available.
+ *
+ * When this returns zero, the specified adapter became available for
+ * clients using the bus number provided in adap->nr.  Also, the table
+ * of I2C devices pre-declared using i2c_register_board_info() is scanned,
+ * and the appropriate driver model device nodes are created.  Otherwise, a
+ * negative errno value is returned.
+ */
+int i2c_add_numbered_adapter(struct i2c_adapter *adap)
+{
+	if (adap->nr == -1) /* -1 means dynamically assign bus id */
+		return i2c_add_adapter(adap);
+
+	return __i2c_add_numbered_adapter(adap);
+}
+EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);
+
+static void i2c_do_del_adapter(struct i2c_driver *driver,
+			      struct i2c_adapter *adapter)
+{
+	struct i2c_client *client, *_n;
+
+	/* Remove the devices we created ourselves as the result of hardware
+	 * probing (using a driver's detect method) */
+	list_for_each_entry_safe(client, _n, &driver->clients, detected) {
+		if (client->adapter == adapter) {
+			dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
+				client->name, client->addr);
+			list_del(&client->detected);
+			i2c_unregister_device(client);
+		}
+	}
+}
+
+static int __unregister_client(struct device *dev, void *dummy)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	if (client && strcmp(client->name, "dummy"))
+		i2c_unregister_device(client);
+	return 0;
+}
+
+static int __unregister_dummy(struct device *dev, void *dummy)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	if (client)
+		i2c_unregister_device(client);
+	return 0;
+}
+
+static int __process_removed_adapter(struct device_driver *d, void *data)
+{
+	i2c_do_del_adapter(to_i2c_driver(d), data);
+	return 0;
+}
+
+/**
+ * i2c_del_adapter - unregister I2C adapter
+ * @adap: the adapter being unregistered
+ * Context: can sleep
+ *
+ * This unregisters an I2C adapter which was previously registered
+ * by @i2c_add_adapter or @i2c_add_numbered_adapter.
+ */
+void i2c_del_adapter(struct i2c_adapter *adap)
+{
+	struct i2c_adapter *found;
+	struct i2c_client *client, *next;
+
+	/* First make sure that this adapter was ever added */
+	mutex_lock(&core_lock);
+	found = idr_find(&i2c_adapter_idr, adap->nr);
+	mutex_unlock(&core_lock);
+	if (found != adap) {
+		pr_debug("attempting to delete unregistered adapter [%s]\n", adap->name);
+		return;
+	}
+
+	i2c_acpi_remove_space_handler(adap);
+	/* Tell drivers about this removal */
+	mutex_lock(&core_lock);
+	bus_for_each_drv(&i2c_bus_type, NULL, adap,
+			       __process_removed_adapter);
+	mutex_unlock(&core_lock);
+
+	/* Remove devices instantiated from sysfs */
+	mutex_lock_nested(&adap->userspace_clients_lock,
+			  i2c_adapter_depth(adap));
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
+			client->addr);
+		list_del(&client->detected);
+		i2c_unregister_device(client);
+	}
+	mutex_unlock(&adap->userspace_clients_lock);
+
+	/* Detach any active clients. This can't fail, thus we do not
+	 * check the returned value. This is a two-pass process, because
+	 * we can't remove the dummy devices during the first pass: they
+	 * could have been instantiated by real devices wishing to clean
+	 * them up properly, so we give them a chance to do that first. */
+	device_for_each_child(&adap->dev, NULL, __unregister_client);
+	device_for_each_child(&adap->dev, NULL, __unregister_dummy);
+
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_remove_link(i2c_adapter_compat_class, &adap->dev,
+				 adap->dev.parent);
+#endif
+
+	/* device name is gone after device_unregister */
+	dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name);
+
+	pm_runtime_disable(&adap->dev);
+
+	i2c_host_notify_irq_teardown(adap);
+
+	/* wait until all references to the device are gone
+	 *
+	 * FIXME: This is old code and should ideally be replaced by an
+	 * alternative which results in decoupling the lifetime of the struct
+	 * device from the i2c_adapter, like spi or netdev do. Any solution
+	 * should be thoroughly tested with DEBUG_KOBJECT_RELEASE enabled!
+	 */
+	init_completion(&adap->dev_released);
+	device_unregister(&adap->dev);
+	wait_for_completion(&adap->dev_released);
+
+	/* free bus id */
+	mutex_lock(&core_lock);
+	idr_remove(&i2c_adapter_idr, adap->nr);
+	mutex_unlock(&core_lock);
+
+	/* Clear the device structure in case this adapter is ever going to be
+	   added again */
+	memset(&adap->dev, 0, sizeof(adap->dev));
+}
+EXPORT_SYMBOL(i2c_del_adapter);
+
+/**
+ * i2c_parse_fw_timings - get I2C related timing parameters from firmware
+ * @dev: The device to scan for I2C timing properties
+ * @t: the i2c_timings struct to be filled with values
+ * @use_defaults: bool to use sane defaults derived from the I2C specification
+ *		  when properties are not found, otherwise use 0
+ *
+ * Scan the device for the generic I2C properties describing timing parameters
+ * for the signal and fill the given struct with the results. If a property was
+ * not found and use_defaults was true, then maximum timings are assumed which
+ * are derived from the I2C specification. If use_defaults is not used, the
+ * results will be 0, so drivers can apply their own defaults later. The latter
+ * is mainly intended for avoiding regressions of existing drivers which want
+ * to switch to this function. New drivers almost always should use the defaults.
+ */
+
+void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults)
+{
+	int ret;
+
+	memset(t, 0, sizeof(*t));
+
+	ret = device_property_read_u32(dev, "clock-frequency", &t->bus_freq_hz);
+	if (ret && use_defaults)
+		t->bus_freq_hz = 100000;
+
+	ret = device_property_read_u32(dev, "i2c-scl-rising-time-ns", &t->scl_rise_ns);
+	if (ret && use_defaults) {
+		if (t->bus_freq_hz <= 100000)
+			t->scl_rise_ns = 1000;
+		else if (t->bus_freq_hz <= 400000)
+			t->scl_rise_ns = 300;
+		else
+			t->scl_rise_ns = 120;
+	}
+
+	ret = device_property_read_u32(dev, "i2c-scl-falling-time-ns", &t->scl_fall_ns);
+	if (ret && use_defaults) {
+		if (t->bus_freq_hz <= 400000)
+			t->scl_fall_ns = 300;
+		else
+			t->scl_fall_ns = 120;
+	}
+
+	device_property_read_u32(dev, "i2c-scl-internal-delay-ns", &t->scl_int_delay_ns);
+
+	ret = device_property_read_u32(dev, "i2c-sda-falling-time-ns", &t->sda_fall_ns);
+	if (ret && use_defaults)
+		t->sda_fall_ns = t->scl_fall_ns;
+}
+EXPORT_SYMBOL_GPL(i2c_parse_fw_timings);
+
+/* ------------------------------------------------------------------------- */
+
+int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *))
+{
+	int res;
+
+	mutex_lock(&core_lock);
+	res = bus_for_each_dev(&i2c_bus_type, NULL, data, fn);
+	mutex_unlock(&core_lock);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(i2c_for_each_dev);
+
+static int __process_new_driver(struct device *dev, void *data)
+{
+	if (dev->type != &i2c_adapter_type)
+		return 0;
+	return i2c_do_add_adapter(data, to_i2c_adapter(dev));
+}
+
+/*
+ * An i2c_driver is used with one or more i2c_client (device) nodes to access
+ * i2c slave chips, on a bus instance associated with some i2c_adapter.
+ */
+
+int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
+{
+	int res;
+
+	/* Can't register until after driver model init */
+	if (WARN_ON(!is_registered))
+		return -EAGAIN;
+
+	/* add the driver to the list of i2c drivers in the driver core */
+	driver->driver.owner = owner;
+	driver->driver.bus = &i2c_bus_type;
+	INIT_LIST_HEAD(&driver->clients);
+
+	/* When registration returns, the driver core
+	 * will have called probe() for all matching-but-unbound devices.
+	 */
+	res = driver_register(&driver->driver);
+	if (res)
+		return res;
+
+	pr_debug("driver [%s] registered\n", driver->driver.name);
+
+	/* Walk the adapters that are already present */
+	i2c_for_each_dev(driver, __process_new_driver);
+
+	return 0;
+}
+EXPORT_SYMBOL(i2c_register_driver);
+
+static int __process_removed_driver(struct device *dev, void *data)
+{
+	if (dev->type == &i2c_adapter_type)
+		i2c_do_del_adapter(data, to_i2c_adapter(dev));
+	return 0;
+}
+
+/**
+ * i2c_del_driver - unregister I2C driver
+ * @driver: the driver being unregistered
+ * Context: can sleep
+ */
+void i2c_del_driver(struct i2c_driver *driver)
+{
+	i2c_for_each_dev(driver, __process_removed_driver);
+
+	driver_unregister(&driver->driver);
+	pr_debug("driver [%s] unregistered\n", driver->driver.name);
+}
+EXPORT_SYMBOL(i2c_del_driver);
+
+/* ------------------------------------------------------------------------- */
+
+/**
+ * i2c_use_client - increments the reference count of the i2c client structure
+ * @client: the client being referenced
+ *
+ * Each live reference to a client should be refcounted. The driver model does
+ * that automatically as part of driver binding, so that most drivers don't
+ * need to do this explicitly: they hold a reference until they're unbound
+ * from the device.
+ *
+ * A pointer to the client with the incremented reference counter is returned.
+ */
+struct i2c_client *i2c_use_client(struct i2c_client *client)
+{
+	if (client && get_device(&client->dev))
+		return client;
+	return NULL;
+}
+EXPORT_SYMBOL(i2c_use_client);
+
+/**
+ * i2c_release_client - release a use of the i2c client structure
+ * @client: the client being no longer referenced
+ *
+ * Must be called when a user of a client is finished with it.
+ */
+void i2c_release_client(struct i2c_client *client)
+{
+	if (client)
+		put_device(&client->dev);
+}
+EXPORT_SYMBOL(i2c_release_client);
+
+struct i2c_cmd_arg {
+	unsigned	cmd;
+	void		*arg;
+};
+
+static int i2c_cmd(struct device *dev, void *_arg)
+{
+	struct i2c_client	*client = i2c_verify_client(dev);
+	struct i2c_cmd_arg	*arg = _arg;
+	struct i2c_driver	*driver;
+
+	if (!client || !client->dev.driver)
+		return 0;
+
+	driver = to_i2c_driver(client->dev.driver);
+	if (driver->command)
+		driver->command(client, arg->cmd, arg->arg);
+	return 0;
+}
+
+void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg)
+{
+	struct i2c_cmd_arg	cmd_arg;
+
+	cmd_arg.cmd = cmd;
+	cmd_arg.arg = arg;
+	device_for_each_child(&adap->dev, &cmd_arg, i2c_cmd);
+}
+EXPORT_SYMBOL(i2c_clients_command);
+
+static int __init i2c_init(void)
+{
+	int retval;
+
+	retval = of_alias_get_highest_id("i2c");
+
+	down_write(&__i2c_board_lock);
+	if (retval >= __i2c_first_dynamic_bus_num)
+		__i2c_first_dynamic_bus_num = retval + 1;
+	up_write(&__i2c_board_lock);
+
+	retval = bus_register(&i2c_bus_type);
+	if (retval)
+		return retval;
+
+	is_registered = true;
+
+#ifdef CONFIG_I2C_COMPAT
+	i2c_adapter_compat_class = class_compat_register("i2c-adapter");
+	if (!i2c_adapter_compat_class) {
+		retval = -ENOMEM;
+		goto bus_err;
+	}
+#endif
+	retval = i2c_add_driver(&dummy_driver);
+	if (retval)
+		goto class_err;
+
+	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
+		WARN_ON(of_reconfig_notifier_register(&i2c_of_notifier));
+	if (IS_ENABLED(CONFIG_ACPI))
+		WARN_ON(acpi_reconfig_notifier_register(&i2c_acpi_notifier));
+
+	return 0;
+
+class_err:
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_unregister(i2c_adapter_compat_class);
+bus_err:
+#endif
+	is_registered = false;
+	bus_unregister(&i2c_bus_type);
+	return retval;
+}
+
+static void __exit i2c_exit(void)
+{
+	if (IS_ENABLED(CONFIG_ACPI))
+		WARN_ON(acpi_reconfig_notifier_unregister(&i2c_acpi_notifier));
+	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
+		WARN_ON(of_reconfig_notifier_unregister(&i2c_of_notifier));
+	i2c_del_driver(&dummy_driver);
+#ifdef CONFIG_I2C_COMPAT
+	class_compat_unregister(i2c_adapter_compat_class);
+#endif
+	bus_unregister(&i2c_bus_type);
+	tracepoint_synchronize_unregister();
+}
+
+/* We must initialize early, because some subsystems register i2c drivers
+ * in subsys_initcall() code, but are linked (and initialized) before i2c.
+ */
+postcore_initcall(i2c_init);
+module_exit(i2c_exit);
+
+/* ----------------------------------------------------
+ * the functional interface to the i2c busses.
+ * ----------------------------------------------------
+ */
+
+/* Check if val is exceeding the quirk IFF quirk is non 0 */
+#define i2c_quirk_exceeded(val, quirk) ((quirk) && ((val) > (quirk)))
+
+static int i2c_quirk_error(struct i2c_adapter *adap, struct i2c_msg *msg, char *err_msg)
+{
+	dev_err_ratelimited(&adap->dev, "adapter quirk: %s (addr 0x%04x, size %u, %s)\n",
+			    err_msg, msg->addr, msg->len,
+			    msg->flags & I2C_M_RD ? "read" : "write");
+	return -EOPNOTSUPP;
+}
+
+static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
+{
+	const struct i2c_adapter_quirks *q = adap->quirks;
+	int max_num = q->max_num_msgs, i;
+	bool do_len_check = true;
+
+	if (q->flags & I2C_AQ_COMB) {
+		max_num = 2;
+
+		/* special checks for combined messages */
+		if (num == 2) {
+			if (q->flags & I2C_AQ_COMB_WRITE_FIRST && msgs[0].flags & I2C_M_RD)
+				return i2c_quirk_error(adap, &msgs[0], "1st comb msg must be write");
+
+			if (q->flags & I2C_AQ_COMB_READ_SECOND && !(msgs[1].flags & I2C_M_RD))
+				return i2c_quirk_error(adap, &msgs[1], "2nd comb msg must be read");
+
+			if (q->flags & I2C_AQ_COMB_SAME_ADDR && msgs[0].addr != msgs[1].addr)
+				return i2c_quirk_error(adap, &msgs[0], "comb msg only to same addr");
+
+			if (i2c_quirk_exceeded(msgs[0].len, q->max_comb_1st_msg_len))
+				return i2c_quirk_error(adap, &msgs[0], "msg too long");
+
+			if (i2c_quirk_exceeded(msgs[1].len, q->max_comb_2nd_msg_len))
+				return i2c_quirk_error(adap, &msgs[1], "msg too long");
+
+			do_len_check = false;
+		}
+	}
+
+	if (i2c_quirk_exceeded(num, max_num))
+		return i2c_quirk_error(adap, &msgs[0], "too many messages");
+
+	for (i = 0; i < num; i++) {
+		u16 len = msgs[i].len;
+
+		if (msgs[i].flags & I2C_M_RD) {
+			if (do_len_check && i2c_quirk_exceeded(len, q->max_read_len))
+				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+		} else {
+			if (do_len_check && i2c_quirk_exceeded(len, q->max_write_len))
+				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * __i2c_transfer - unlocked flavor of i2c_transfer
+ * @adap: Handle to I2C bus
+ * @msgs: One or more messages to execute before STOP is issued to
+ *	terminate the operation; each message begins with a START.
+ * @num: Number of messages to be executed.
+ *
+ * Returns negative errno, else the number of messages executed.
+ *
+ * Adapter lock must be held when calling this function. No debug logging
+ * takes place. adap->algo->master_xfer existence isn't checked.
+ */
+int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
+{
+	unsigned long orig_jiffies;
+	int ret, try;
+
+	if (adap->quirks && i2c_check_for_quirks(adap, msgs, num))
+		return -EOPNOTSUPP;
+
+	/* i2c_trace_msg gets enabled when tracepoint i2c_transfer gets
+	 * enabled.  This is an efficient way of keeping the for-loop from
+	 * being executed when not needed.
+	 */
+	if (static_key_false(&i2c_trace_msg)) {
+		int i;
+		for (i = 0; i < num; i++)
+			if (msgs[i].flags & I2C_M_RD)
+				trace_i2c_read(adap, &msgs[i], i);
+			else
+				trace_i2c_write(adap, &msgs[i], i);
+	}
+
+	/* Retry automatically on arbitration loss */
+	orig_jiffies = jiffies;
+	for (ret = 0, try = 0; try <= adap->retries; try++) {
+		ret = adap->algo->master_xfer(adap, msgs, num);
+		if (ret != -EAGAIN)
+			break;
+		if (time_after(jiffies, orig_jiffies + adap->timeout))
+			break;
+	}
+
+	if (static_key_false(&i2c_trace_msg)) {
+		int i;
+		for (i = 0; i < ret; i++)
+			if (msgs[i].flags & I2C_M_RD)
+				trace_i2c_reply(adap, &msgs[i], i);
+		trace_i2c_result(adap, i, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(__i2c_transfer);
+
+/**
+ * i2c_transfer - execute a single or combined I2C message
+ * @adap: Handle to I2C bus
+ * @msgs: One or more messages to execute before STOP is issued to
+ *	terminate the operation; each message begins with a START.
+ * @num: Number of messages to be executed.
+ *
+ * Returns negative errno, else the number of messages executed.
+ *
+ * Note that there is no requirement that each message be sent to
+ * the same slave address, although that is the most common model.
+ */
+int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
+{
+	int ret;
+
+	/* REVISIT the fault reporting model here is weak:
+	 *
+	 *  - When we get an error after receiving N bytes from a slave,
+	 *    there is no way to report "N".
+	 *
+	 *  - When we get a NAK after transmitting N bytes to a slave,
+	 *    there is no way to report "N" ... or to let the master
+	 *    continue executing the rest of this combined message, if
+	 *    that's the appropriate response.
+	 *
+	 *  - When for example "num" is two and we successfully complete
+	 *    the first message but get an error part way through the
+	 *    second, it's unclear whether that should be reported as
+	 *    one (discarding status on the second message) or errno
+	 *    (discarding status on the first one).
+	 */
+
+	if (adap->algo->master_xfer) {
+#ifdef DEBUG
+		for (ret = 0; ret < num; ret++) {
+			dev_dbg(&adap->dev,
+				"master_xfer[%d] %c, addr=0x%02x, len=%d%s\n",
+				ret, (msgs[ret].flags & I2C_M_RD) ? 'R' : 'W',
+				msgs[ret].addr, msgs[ret].len,
+				(msgs[ret].flags & I2C_M_RECV_LEN) ? "+" : "");
+		}
+#endif
+
+		if (in_atomic() || irqs_disabled()) {
+			ret = i2c_trylock_bus(adap, I2C_LOCK_SEGMENT);
+			if (!ret)
+				/* I2C activity is ongoing. */
+				return -EAGAIN;
+		} else {
+			i2c_lock_bus(adap, I2C_LOCK_SEGMENT);
+		}
+
+		ret = __i2c_transfer(adap, msgs, num);
+		i2c_unlock_bus(adap, I2C_LOCK_SEGMENT);
+
+		return ret;
+	} else {
+		dev_dbg(&adap->dev, "I2C level transfers not supported\n");
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL(i2c_transfer);
+
+/**
+ * i2c_master_send - issue a single I2C message in master transmit mode
+ * @client: Handle to slave device
+ * @buf: Data that will be written to the slave
+ * @count: How many bytes to write, must be less than 64k since msg.len is u16
+ *
+ * Returns negative errno, or else the number of bytes written.
+ */
+int i2c_master_send(const struct i2c_client *client, const char *buf, int count)
+{
+	int ret;
+	struct i2c_adapter *adap = client->adapter;
+	struct i2c_msg msg;
+
+	msg.addr = client->addr;
+	msg.flags = client->flags & I2C_M_TEN;
+	msg.len = count;
+	msg.buf = (char *)buf;
+
+	ret = i2c_transfer(adap, &msg, 1);
+
+	/*
+	 * If everything went ok (i.e. 1 msg transmitted), return #bytes
+	 * transmitted, else error code.
+	 */
+	return (ret == 1) ? count : ret;
+}
+EXPORT_SYMBOL(i2c_master_send);
+
+/**
+ * i2c_master_recv - issue a single I2C message in master receive mode
+ * @client: Handle to slave device
+ * @buf: Where to store data read from slave
+ * @count: How many bytes to read, must be less than 64k since msg.len is u16
+ *
+ * Returns negative errno, or else the number of bytes read.
+ */
+int i2c_master_recv(const struct i2c_client *client, char *buf, int count)
+{
+	struct i2c_adapter *adap = client->adapter;
+	struct i2c_msg msg;
+	int ret;
+
+	msg.addr = client->addr;
+	msg.flags = client->flags & I2C_M_TEN;
+	msg.flags |= I2C_M_RD;
+	msg.len = count;
+	msg.buf = buf;
+
+	ret = i2c_transfer(adap, &msg, 1);
+
+	/*
+	 * If everything went ok (i.e. 1 msg received), return #bytes received,
+	 * else error code.
+	 */
+	return (ret == 1) ? count : ret;
+}
+EXPORT_SYMBOL(i2c_master_recv);
+
+/* ----------------------------------------------------
+ * the i2c address scanning function
+ * Will not work for 10-bit addresses!
+ * ----------------------------------------------------
+ */
+
+/*
+ * Legacy default probe function, mostly relevant for SMBus. The default
+ * probe method is a quick write, but it is known to corrupt the 24RF08
+ * EEPROMs due to a state machine bug, and could also irreversibly
+ * write-protect some EEPROMs, so for address ranges 0x30-0x37 and 0x50-0x5f,
+ * we use a short byte read instead. Also, some bus drivers don't implement
+ * quick write, so we fallback to a byte read in that case too.
+ * On x86, there is another special case for FSC hardware monitoring chips,
+ * which want regular byte reads (address 0x73.) Fortunately, these are the
+ * only known chips using this I2C address on PC hardware.
+ * Returns 1 if probe succeeded, 0 if not.
+ */
+static int i2c_default_probe(struct i2c_adapter *adap, unsigned short addr)
+{
+	int err;
+	union i2c_smbus_data dummy;
+
+#ifdef CONFIG_X86
+	if (addr == 0x73 && (adap->class & I2C_CLASS_HWMON)
+	 && i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE_DATA))
+		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
+				     I2C_SMBUS_BYTE_DATA, &dummy);
+	else
+#endif
+	if (!((addr & ~0x07) == 0x30 || (addr & ~0x0f) == 0x50)
+	 && i2c_check_functionality(adap, I2C_FUNC_SMBUS_QUICK))
+		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_WRITE, 0,
+				     I2C_SMBUS_QUICK, NULL);
+	else if (i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE))
+		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
+				     I2C_SMBUS_BYTE, &dummy);
+	else {
+		dev_warn(&adap->dev, "No suitable probing method supported for address 0x%02X\n",
+			 addr);
+		err = -EOPNOTSUPP;
+	}
+
+	return err >= 0;
+}
+
+static int i2c_detect_address(struct i2c_client *temp_client,
+			      struct i2c_driver *driver)
+{
+	struct i2c_board_info info;
+	struct i2c_adapter *adapter = temp_client->adapter;
+	int addr = temp_client->addr;
+	int err;
+
+	/* Make sure the address is valid */
+	err = i2c_check_7bit_addr_validity_strict(addr);
+	if (err) {
+		dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
+			 addr);
+		return err;
+	}
+
+	/* Skip if already in use (7 bit, no need to encode flags) */
+	if (i2c_check_addr_busy(adapter, addr))
+		return 0;
+
+	/* Make sure there is something at this address */
+	if (!i2c_default_probe(adapter, addr))
+		return 0;
+
+	/* Finally call the custom detection function */
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	info.addr = addr;
+	err = driver->detect(temp_client, &info);
+	if (err) {
+		/* -ENODEV is returned if the detection fails. We catch it
+		   here as this isn't an error. */
+		return err == -ENODEV ? 0 : err;
+	}
+
+	/* Consistency check */
+	if (info.type[0] == '\0') {
+		dev_err(&adapter->dev,
+			"%s detection function provided no name for 0x%x\n",
+			driver->driver.name, addr);
+	} else {
+		struct i2c_client *client;
+
+		/* Detection succeeded, instantiate the device */
+		if (adapter->class & I2C_CLASS_DEPRECATED)
+			dev_warn(&adapter->dev,
+				"This adapter will soon drop class based instantiation of devices. "
+				"Please make sure client 0x%02x gets instantiated by other means. "
+				"Check 'Documentation/i2c/instantiating-devices' for details.\n",
+				info.addr);
+
+		dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
+			info.type, info.addr);
+		client = i2c_new_device(adapter, &info);
+		if (client)
+			list_add_tail(&client->detected, &driver->clients);
+		else
+			dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
+				info.type, info.addr);
+	}
+	return 0;
+}
+
+static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
+{
+	const unsigned short *address_list;
+	struct i2c_client *temp_client;
+	int i, err = 0;
+	int adap_id = i2c_adapter_id(adapter);
+
+	address_list = driver->address_list;
+	if (!driver->detect || !address_list)
+		return 0;
+
+	/* Warn that the adapter lost class based instantiation */
+	if (adapter->class == I2C_CLASS_DEPRECATED) {
+		dev_dbg(&adapter->dev,
+			"This adapter dropped support for I2C classes and won't auto-detect %s devices anymore. "
+			"If you need it, check 'Documentation/i2c/instantiating-devices' for alternatives.\n",
+			driver->driver.name);
+		return 0;
+	}
+
+	/* Stop here if the classes do not match */
+	if (!(adapter->class & driver->class))
+		return 0;
+
+	/* Set up a temporary client to help detect callback */
+	temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
+	if (!temp_client)
+		return -ENOMEM;
+	temp_client->adapter = adapter;
+
+	for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) {
+		dev_dbg(&adapter->dev,
+			"found normal entry for adapter %d, addr 0x%02x\n",
+			adap_id, address_list[i]);
+		temp_client->addr = address_list[i];
+		err = i2c_detect_address(temp_client, driver);
+		if (unlikely(err))
+			break;
+	}
+
+	kfree(temp_client);
+	return err;
+}
+
+int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr)
+{
+	return i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
+			      I2C_SMBUS_QUICK, NULL) >= 0;
+}
+EXPORT_SYMBOL_GPL(i2c_probe_func_quick_read);
+
+struct i2c_client *
+i2c_new_probed_device(struct i2c_adapter *adap,
+		      struct i2c_board_info *info,
+		      unsigned short const *addr_list,
+		      int (*probe)(struct i2c_adapter *, unsigned short addr))
+{
+	int i;
+
+	if (!probe)
+		probe = i2c_default_probe;
+
+	for (i = 0; addr_list[i] != I2C_CLIENT_END; i++) {
+		/* Check address validity */
+		if (i2c_check_7bit_addr_validity_strict(addr_list[i]) < 0) {
+			dev_warn(&adap->dev, "Invalid 7-bit address 0x%02x\n",
+				 addr_list[i]);
+			continue;
+		}
+
+		/* Check address availability (7 bit, no need to encode flags) */
+		if (i2c_check_addr_busy(adap, addr_list[i])) {
+			dev_dbg(&adap->dev,
+				"Address 0x%02x already in use, not probing\n",
+				addr_list[i]);
+			continue;
+		}
+
+		/* Test address responsiveness */
+		if (probe(adap, addr_list[i]))
+			break;
+	}
+
+	if (addr_list[i] == I2C_CLIENT_END) {
+		dev_dbg(&adap->dev, "Probing failed, no device found\n");
+		return NULL;
+	}
+
+	info->addr = addr_list[i];
+	return i2c_new_device(adap, info);
+}
+EXPORT_SYMBOL_GPL(i2c_new_probed_device);
+
+struct i2c_adapter *i2c_get_adapter(int nr)
+{
+	struct i2c_adapter *adapter;
+
+	mutex_lock(&core_lock);
+	adapter = idr_find(&i2c_adapter_idr, nr);
+	if (!adapter)
+		goto exit;
+
+	if (try_module_get(adapter->owner))
+		get_device(&adapter->dev);
+	else
+		adapter = NULL;
+
+ exit:
+	mutex_unlock(&core_lock);
+	return adapter;
+}
+EXPORT_SYMBOL(i2c_get_adapter);
+
+void i2c_put_adapter(struct i2c_adapter *adap)
+{
+	if (!adap)
+		return;
+
+	put_device(&adap->dev);
+	module_put(adap->owner);
+}
+EXPORT_SYMBOL(i2c_put_adapter);
+
+MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
+MODULE_DESCRIPTION("I2C-Bus main module");
+MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
new file mode 100644
index 0000000..ccf82fd
--- /dev/null
+++ b/drivers/i2c/i2c-core-of.c

@@ -0,0 +1,276 @@
+/*
+ * Linux I2C core OF support code
+ *
+ * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
+ * based on a previous patch from Jon Smirl <jonsmirl@gmail.com>
+ *
+ * Copyright (C) 2013 Wolfram Sang <wsa@the-dreams.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <dt-bindings/i2c/i2c.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "i2c-core.h"
+
+static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
+						 struct device_node *node)
+{
+	struct i2c_client *result;
+	struct i2c_board_info info = {};
+	struct dev_archdata dev_ad = {};
+	const __be32 *addr_be;
+	u32 addr;
+	int len;
+
+	dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
+
+	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
+		dev_err(&adap->dev, "of_i2c: modalias failure on %s\n",
+			node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	addr_be = of_get_property(node, "reg", &len);
+	if (!addr_be || (len < sizeof(*addr_be))) {
+		dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
+			node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	addr = be32_to_cpup(addr_be);
+	if (addr & I2C_TEN_BIT_ADDRESS) {
+		addr &= ~I2C_TEN_BIT_ADDRESS;
+		info.flags |= I2C_CLIENT_TEN;
+	}
+
+	if (addr & I2C_OWN_SLAVE_ADDRESS) {
+		addr &= ~I2C_OWN_SLAVE_ADDRESS;
+		info.flags |= I2C_CLIENT_SLAVE;
+	}
+
+	if (i2c_check_addr_validity(addr, info.flags)) {
+		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
+			addr, node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	info.addr = addr;
+	info.of_node = of_node_get(node);
+	info.archdata = &dev_ad;
+
+	if (of_property_read_bool(node, "host-notify"))
+		info.flags |= I2C_CLIENT_HOST_NOTIFY;
+
+	if (of_get_property(node, "wakeup-source", NULL))
+		info.flags |= I2C_CLIENT_WAKE;
+
+	result = i2c_new_device(adap, &info);
+	if (result == NULL) {
+		dev_err(&adap->dev, "of_i2c: Failure registering %s\n",
+			node->full_name);
+		of_node_put(node);
+		return ERR_PTR(-EINVAL);
+	}
+	return result;
+}
+
+void of_i2c_register_devices(struct i2c_adapter *adap)
+{
+	struct device_node *bus, *node;
+	struct i2c_client *client;
+
+	/* Only register child devices if the adapter has a node pointer set */
+	if (!adap->dev.of_node)
+		return;
+
+	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
+
+	bus = of_get_child_by_name(adap->dev.of_node, "i2c-bus");
+	if (!bus)
+		bus = of_node_get(adap->dev.of_node);
+
+	for_each_available_child_of_node(bus, node) {
+		if (of_node_test_and_set_flag(node, OF_POPULATED))
+			continue;
+
+		client = of_i2c_register_device(adap, node);
+		if (IS_ERR(client)) {
+			dev_warn(&adap->dev,
+				 "Failed to create I2C device for %s\n",
+				 node->full_name);
+			of_node_clear_flag(node, OF_POPULATED);
+		}
+	}
+
+	of_node_put(bus);
+}
+
+static int of_dev_node_match(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+/* must call put_device() when done with returned i2c_client device */
+struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
+{
+	struct device *dev;
+	struct i2c_client *client;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	client = i2c_verify_client(dev);
+	if (!client)
+		put_device(dev);
+
+	return client;
+}
+EXPORT_SYMBOL(of_find_i2c_device_by_node);
+
+/* must call put_device() when done with returned i2c_adapter device */
+struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
+{
+	struct device *dev;
+	struct i2c_adapter *adapter;
+
+	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+	if (!dev)
+		return NULL;
+
+	adapter = i2c_verify_adapter(dev);
+	if (!adapter)
+		put_device(dev);
+
+	return adapter;
+}
+EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
+
+/* must call i2c_put_adapter() when done with returned i2c_adapter device */
+struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
+{
+	struct i2c_adapter *adapter;
+
+	adapter = of_find_i2c_adapter_by_node(node);
+	if (!adapter)
+		return NULL;
+
+	if (!try_module_get(adapter->owner)) {
+		put_device(&adapter->dev);
+		adapter = NULL;
+	}
+
+	return adapter;
+}
+EXPORT_SYMBOL(of_get_i2c_adapter_by_node);
+
+static const struct of_device_id*
+i2c_of_match_device_sysfs(const struct of_device_id *matches,
+				  struct i2c_client *client)
+{
+	const char *name;
+
+	for (; matches->compatible[0]; matches++) {
+		/*
+		 * Adding devices through the i2c sysfs interface provides us
+		 * a string to match which may be compatible with the device
+		 * tree compatible strings, however with no actual of_node the
+		 * of_match_device() will not match
+		 */
+		if (sysfs_streq(client->name, matches->compatible))
+			return matches;
+
+		name = strchr(matches->compatible, ',');
+		if (!name)
+			name = matches->compatible;
+		else
+			name++;
+
+		if (sysfs_streq(client->name, name))
+			return matches;
+	}
+
+	return NULL;
+}
+
+const struct of_device_id
+*i2c_of_match_device(const struct of_device_id *matches,
+		     struct i2c_client *client)
+{
+	const struct of_device_id *match;
+
+	if (!(client && matches))
+		return NULL;
+
+	match = of_match_device(matches, &client->dev);
+	if (match)
+		return match;
+
+	return i2c_of_match_device_sysfs(matches, client);
+}
+EXPORT_SYMBOL_GPL(i2c_of_match_device);
+
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
+static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
+			 void *arg)
+{
+	struct of_reconfig_data *rd = arg;
+	struct i2c_adapter *adap;
+	struct i2c_client *client;
+
+	switch (of_reconfig_get_state_change(action, rd)) {
+	case OF_RECONFIG_CHANGE_ADD:
+		adap = of_find_i2c_adapter_by_node(rd->dn->parent);
+		if (adap == NULL)
+			return NOTIFY_OK;	/* not for us */
+
+		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
+			put_device(&adap->dev);
+			return NOTIFY_OK;
+		}
+
+		client = of_i2c_register_device(adap, rd->dn);
+		put_device(&adap->dev);
+
+		if (IS_ERR(client)) {
+			dev_err(&adap->dev, "failed to create client for '%s'\n",
+				 rd->dn->full_name);
+			of_node_clear_flag(rd->dn, OF_POPULATED);
+			return notifier_from_errno(PTR_ERR(client));
+		}
+		break;
+	case OF_RECONFIG_CHANGE_REMOVE:
+		/* already depopulated? */
+		if (!of_node_check_flag(rd->dn, OF_POPULATED))
+			return NOTIFY_OK;
+
+		/* find our device by node */
+		client = of_find_i2c_device_by_node(rd->dn);
+		if (client == NULL)
+			return NOTIFY_OK;	/* no? not meant for us */
+
+		/* unregister takes one ref away */
+		i2c_unregister_device(client);
+
+		/* and put the reference of the find */
+		put_device(&client->dev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+struct notifier_block i2c_of_notifier = {
+	.notifier_call = of_i2c_notify,
+};
+#endif /* CONFIG_OF_DYNAMIC */

diff --git a/drivers/i2c/i2c-core-slave.c b/drivers/i2c/i2c-core-slave.c
new file mode 100644
index 0000000..4a78c65
--- /dev/null
+++ b/drivers/i2c/i2c-core-slave.c

@@ -0,0 +1,115 @@
+/*
+ * Linux I2C core slave support code
+ *
+ * Copyright (C) 2014 by Wolfram Sang <wsa@sang-engineering.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <dt-bindings/i2c/i2c.h>
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+
+#include "i2c-core.h"
+
+int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
+{
+	int ret;
+
+	if (!client || !slave_cb) {
+		WARN(1, "insufficient data\n");
+		return -EINVAL;
+	}
+
+	if (!(client->flags & I2C_CLIENT_SLAVE))
+		dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
+			 __func__);
+
+	if (!(client->flags & I2C_CLIENT_TEN)) {
+		/* Enforce stricter address checking */
+		ret = i2c_check_7bit_addr_validity_strict(client->addr);
+		if (ret) {
+			dev_err(&client->dev, "%s: invalid address\n", __func__);
+			return ret;
+		}
+	}
+
+	if (!client->adapter->algo->reg_slave) {
+		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
+		return -EOPNOTSUPP;
+	}
+
+	client->slave_cb = slave_cb;
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->reg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret) {
+		client->slave_cb = NULL;
+		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_register);
+
+int i2c_slave_unregister(struct i2c_client *client)
+{
+	int ret;
+
+	if (!client->adapter->algo->unreg_slave) {
+		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
+		return -EOPNOTSUPP;
+	}
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->unreg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret == 0)
+		client->slave_cb = NULL;
+	else
+		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_unregister);
+
+/**
+ * i2c_detect_slave_mode - detect operation mode
+ * @dev: The device owning the bus
+ *
+ * This checks the device nodes for an I2C slave by checking the address
+ * used in the reg property. If the address match the I2C_OWN_SLAVE_ADDRESS
+ * flag this means the device is configured to act as a I2C slave and it will
+ * be listening at that address.
+ *
+ * Returns true if an I2C own slave address is detected, otherwise returns
+ * false.
+ */
+bool i2c_detect_slave_mode(struct device *dev)
+{
+	if (IS_BUILTIN(CONFIG_OF) && dev->of_node) {
+		struct device_node *child;
+		u32 reg;
+
+		for_each_child_of_node(dev->of_node, child) {
+			of_property_read_u32(child, "reg", &reg);
+			if (reg & I2C_OWN_SLAVE_ADDRESS) {
+				of_node_put(child);
+				return true;
+			}
+		}
+	} else if (IS_BUILTIN(CONFIG_ACPI) && ACPI_HANDLE(dev)) {
+		dev_dbg(dev, "ACPI slave is not supported yet\n");
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(i2c_detect_slave_mode);

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
new file mode 100644
index 0000000..10f00a8
--- /dev/null
+++ b/drivers/i2c/i2c-core-smbus.c

@@ -0,0 +1,594 @@
+/*
+ * Linux I2C core SMBus and SMBus emulation code
+ *
+ * This file contains the SMBus functions which are always included in the I2C
+ * core because they can be emulated via I2C. SMBus specific extensions
+ * (e.g. smbalert) are handled in a seperate i2c-smbus module.
+ *
+ * All SMBus-related things are written by Frodo Looijaard <frodol@dds.nl>
+ * SMBus 2.0 support by Mark Studebaker <mdsxyz123@yahoo.com> and
+ * Jean Delvare <jdelvare@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/smbus.h>
+
+
+/* The SMBus parts */
+
+#define POLY    (0x1070U << 3)
+static u8 crc8(u16 data)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (data & 0x8000)
+			data = data ^ POLY;
+		data = data << 1;
+	}
+	return (u8)(data >> 8);
+}
+
+/* Incremental CRC8 over count bytes in the array pointed to by p */
+static u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		crc = crc8((crc ^ p[i]) << 8);
+	return crc;
+}
+
+/* Assume a 7-bit address, which is reasonable for SMBus */
+static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg)
+{
+	/* The address will be sent first */
+	u8 addr = i2c_8bit_addr_from_msg(msg);
+	pec = i2c_smbus_pec(pec, &addr, 1);
+
+	/* The data buffer follows */
+	return i2c_smbus_pec(pec, msg->buf, msg->len);
+}
+
+/* Used for write only transactions */
+static inline void i2c_smbus_add_pec(struct i2c_msg *msg)
+{
+	msg->buf[msg->len] = i2c_smbus_msg_pec(0, msg);
+	msg->len++;
+}
+
+/* Return <0 on CRC error
+   If there was a write before this read (most cases) we need to take the
+   partial CRC from the write part into account.
+   Note that this function does modify the message (we need to decrease the
+   message length to hide the CRC byte from the caller). */
+static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
+{
+	u8 rpec = msg->buf[--msg->len];
+	cpec = i2c_smbus_msg_pec(cpec, msg);
+
+	if (rpec != cpec) {
+		pr_debug("Bad PEC 0x%02x vs. 0x%02x\n",
+			rpec, cpec);
+		return -EBADMSG;
+	}
+	return 0;
+}
+
+/**
+ * i2c_smbus_read_byte - SMBus "receive byte" protocol
+ * @client: Handle to slave device
+ *
+ * This executes the SMBus "receive byte" protocol, returning negative errno
+ * else the byte received from the device.
+ */
+s32 i2c_smbus_read_byte(const struct i2c_client *client)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, 0,
+				I2C_SMBUS_BYTE, &data);
+	return (status < 0) ? status : data.byte;
+}
+EXPORT_SYMBOL(i2c_smbus_read_byte);
+
+/**
+ * i2c_smbus_write_byte - SMBus "send byte" protocol
+ * @client: Handle to slave device
+ * @value: Byte to be sent
+ *
+ * This executes the SMBus "send byte" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value)
+{
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+	                      I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL);
+}
+EXPORT_SYMBOL(i2c_smbus_write_byte);
+
+/**
+ * i2c_smbus_read_byte_data - SMBus "read byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read byte" protocol, returning negative errno
+ * else a data byte received from the device.
+ */
+s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_BYTE_DATA, &data);
+	return (status < 0) ? status : data.byte;
+}
+EXPORT_SYMBOL(i2c_smbus_read_byte_data);
+
+/**
+ * i2c_smbus_write_byte_data - SMBus "write byte" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: Byte being written
+ *
+ * This executes the SMBus "write byte" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command,
+			      u8 value)
+{
+	union i2c_smbus_data data;
+	data.byte = value;
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_BYTE_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_byte_data);
+
+/**
+ * i2c_smbus_read_word_data - SMBus "read word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ *
+ * This executes the SMBus "read word" protocol, returning negative errno
+ * else a 16-bit unsigned "word" received from the device.
+ */
+s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_WORD_DATA, &data);
+	return (status < 0) ? status : data.word;
+}
+EXPORT_SYMBOL(i2c_smbus_read_word_data);
+
+/**
+ * i2c_smbus_write_word_data - SMBus "write word" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @value: 16-bit "word" being written
+ *
+ * This executes the SMBus "write word" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command,
+			      u16 value)
+{
+	union i2c_smbus_data data;
+	data.word = value;
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_WORD_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_word_data);
+
+/**
+ * i2c_smbus_read_block_data - SMBus "block read" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @values: Byte array into which data will be read; big enough to hold
+ *	the data returned by the slave.  SMBus allows at most 32 bytes.
+ *
+ * This executes the SMBus "block read" protocol, returning negative errno
+ * else the number of data bytes in the slave's response.
+ *
+ * Note that using this function requires that the client's adapter support
+ * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality.  Not all adapter drivers
+ * support this; its emulation through I2C messaging relies on a specific
+ * mechanism (I2C_M_RECV_LEN) which may not be implemented.
+ */
+s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command,
+			      u8 *values)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_BLOCK_DATA, &data);
+	if (status)
+		return status;
+
+	memcpy(values, &data.block[1], data.block[0]);
+	return data.block[0];
+}
+EXPORT_SYMBOL(i2c_smbus_read_block_data);
+
+/**
+ * i2c_smbus_write_block_data - SMBus "block write" protocol
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most 32 bytes
+ * @values: Byte array which will be written.
+ *
+ * This executes the SMBus "block write" protocol, returning negative errno
+ * else zero on success.
+ */
+s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command,
+			       u8 length, const u8 *values)
+{
+	union i2c_smbus_data data;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	memcpy(&data.block[1], values, length);
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_BLOCK_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_block_data);
+
+/* Returns the number of read bytes */
+s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command,
+				  u8 length, u8 *values)
+{
+	union i2c_smbus_data data;
+	int status;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+				I2C_SMBUS_READ, command,
+				I2C_SMBUS_I2C_BLOCK_DATA, &data);
+	if (status < 0)
+		return status;
+
+	memcpy(values, &data.block[1], data.block[0]);
+	return data.block[0];
+}
+EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
+
+s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command,
+				   u8 length, const u8 *values)
+{
+	union i2c_smbus_data data;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+	data.block[0] = length;
+	memcpy(data.block + 1, values, length);
+	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			      I2C_SMBUS_WRITE, command,
+			      I2C_SMBUS_I2C_BLOCK_DATA, &data);
+}
+EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data);
+
+/* Simulate a SMBus command using the i2c protocol
+   No checking of parameters is done!  */
+static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
+				   unsigned short flags,
+				   char read_write, u8 command, int size,
+				   union i2c_smbus_data *data)
+{
+	/* So we need to generate a series of msgs. In the case of writing, we
+	  need to use only one message; when reading, we need two. We initialize
+	  most things with sane defaults, to keep the code below somewhat
+	  simpler. */
+	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3];
+	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2];
+	int num = read_write == I2C_SMBUS_READ ? 2 : 1;
+	int i;
+	u8 partial_pec = 0;
+	int status;
+	struct i2c_msg msg[2] = {
+		{
+			.addr = addr,
+			.flags = flags,
+			.len = 1,
+			.buf = msgbuf0,
+		}, {
+			.addr = addr,
+			.flags = flags | I2C_M_RD,
+			.len = 0,
+			.buf = msgbuf1,
+		},
+	};
+
+	msgbuf0[0] = command;
+	switch (size) {
+	case I2C_SMBUS_QUICK:
+		msg[0].len = 0;
+		/* Special case: The read/write field is used as data */
+		msg[0].flags = flags | (read_write == I2C_SMBUS_READ ?
+					I2C_M_RD : 0);
+		num = 1;
+		break;
+	case I2C_SMBUS_BYTE:
+		if (read_write == I2C_SMBUS_READ) {
+			/* Special case: only a read! */
+			msg[0].flags = I2C_M_RD | flags;
+			num = 1;
+		}
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		if (read_write == I2C_SMBUS_READ)
+			msg[1].len = 1;
+		else {
+			msg[0].len = 2;
+			msgbuf0[1] = data->byte;
+		}
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		if (read_write == I2C_SMBUS_READ)
+			msg[1].len = 2;
+		else {
+			msg[0].len = 3;
+			msgbuf0[1] = data->word & 0xff;
+			msgbuf0[2] = data->word >> 8;
+		}
+		break;
+	case I2C_SMBUS_PROC_CALL:
+		num = 2; /* Special case */
+		read_write = I2C_SMBUS_READ;
+		msg[0].len = 3;
+		msg[1].len = 2;
+		msgbuf0[1] = data->word & 0xff;
+		msgbuf0[2] = data->word >> 8;
+		break;
+	case I2C_SMBUS_BLOCK_DATA:
+		if (read_write == I2C_SMBUS_READ) {
+			msg[1].flags |= I2C_M_RECV_LEN;
+			msg[1].len = 1; /* block length will be added by
+					   the underlying bus driver */
+		} else {
+			msg[0].len = data->block[0] + 2;
+			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
+				dev_err(&adapter->dev,
+					"Invalid block write size %d\n",
+					data->block[0]);
+				return -EINVAL;
+			}
+			for (i = 1; i < msg[0].len; i++)
+				msgbuf0[i] = data->block[i-1];
+		}
+		break;
+	case I2C_SMBUS_BLOCK_PROC_CALL:
+		num = 2; /* Another special case */
+		read_write = I2C_SMBUS_READ;
+		if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
+			dev_err(&adapter->dev,
+				"Invalid block write size %d\n",
+				data->block[0]);
+			return -EINVAL;
+		}
+		msg[0].len = data->block[0] + 2;
+		for (i = 1; i < msg[0].len; i++)
+			msgbuf0[i] = data->block[i-1];
+		msg[1].flags |= I2C_M_RECV_LEN;
+		msg[1].len = 1; /* block length will be added by
+				   the underlying bus driver */
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		if (read_write == I2C_SMBUS_READ) {
+			msg[1].len = data->block[0];
+		} else {
+			msg[0].len = data->block[0] + 1;
+			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
+				dev_err(&adapter->dev,
+					"Invalid block write size %d\n",
+					data->block[0]);
+				return -EINVAL;
+			}
+			for (i = 1; i <= data->block[0]; i++)
+				msgbuf0[i] = data->block[i];
+		}
+		break;
+	default:
+		dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
+		return -EOPNOTSUPP;
+	}
+
+	i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
+				      && size != I2C_SMBUS_I2C_BLOCK_DATA);
+	if (i) {
+		/* Compute PEC if first message is a write */
+		if (!(msg[0].flags & I2C_M_RD)) {
+			if (num == 1) /* Write only */
+				i2c_smbus_add_pec(&msg[0]);
+			else /* Write followed by read */
+				partial_pec = i2c_smbus_msg_pec(0, &msg[0]);
+		}
+		/* Ask for PEC if last message is a read */
+		if (msg[num-1].flags & I2C_M_RD)
+			msg[num-1].len++;
+	}
+
+	status = i2c_transfer(adapter, msg, num);
+	if (status < 0)
+		return status;
+
+	/* Check PEC if last message is a read */
+	if (i && (msg[num-1].flags & I2C_M_RD)) {
+		status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
+		if (status < 0)
+			return status;
+	}
+
+	if (read_write == I2C_SMBUS_READ)
+		switch (size) {
+		case I2C_SMBUS_BYTE:
+			data->byte = msgbuf0[0];
+			break;
+		case I2C_SMBUS_BYTE_DATA:
+			data->byte = msgbuf1[0];
+			break;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			data->word = msgbuf1[0] | (msgbuf1[1] << 8);
+			break;
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			for (i = 0; i < data->block[0]; i++)
+				data->block[i+1] = msgbuf1[i];
+			break;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+			for (i = 0; i < msgbuf1[0] + 1; i++)
+				data->block[i] = msgbuf1[i];
+			break;
+		}
+	return 0;
+}
+
+/**
+ * i2c_smbus_xfer - execute SMBus protocol operations
+ * @adapter: Handle to I2C bus
+ * @addr: Address of SMBus slave on that bus
+ * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
+ * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
+ * @command: Byte interpreted by slave, for protocols which use such bytes
+ * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
+ * @data: Data to be read or written
+ *
+ * This executes an SMBus protocol operation, and returns a negative
+ * errno code else zero on success.
+ */
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
+		   char read_write, u8 command, int protocol,
+		   union i2c_smbus_data *data)
+{
+	unsigned long orig_jiffies;
+	int try;
+	s32 res;
+
+	/* If enabled, the following two tracepoints are conditional on
+	 * read_write and protocol.
+	 */
+	trace_smbus_write(adapter, addr, flags, read_write,
+			  command, protocol, data);
+	trace_smbus_read(adapter, addr, flags, read_write,
+			 command, protocol);
+
+	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
+
+	if (adapter->algo->smbus_xfer) {
+		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+
+		/* Retry automatically on arbitration loss */
+		orig_jiffies = jiffies;
+		for (res = 0, try = 0; try <= adapter->retries; try++) {
+			res = adapter->algo->smbus_xfer(adapter, addr, flags,
+							read_write, command,
+							protocol, data);
+			if (res != -EAGAIN)
+				break;
+			if (time_after(jiffies,
+				       orig_jiffies + adapter->timeout))
+				break;
+		}
+		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
+			goto trace;
+		/*
+		 * Fall back to i2c_smbus_xfer_emulated if the adapter doesn't
+		 * implement native support for the SMBus operation.
+		 */
+	}
+
+	res = i2c_smbus_xfer_emulated(adapter, addr, flags, read_write,
+				      command, protocol, data);
+
+trace:
+	/* If enabled, the reply tracepoint is conditional on read_write. */
+	trace_smbus_reply(adapter, addr, flags, read_write,
+			  command, protocol, data);
+	trace_smbus_result(adapter, addr, flags, read_write,
+			   command, protocol, res);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+/**
+ * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
+ * @client: Handle to slave device
+ * @command: Byte interpreted by slave
+ * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes
+ * @values: Byte array into which data will be read; big enough to hold
+ *	the data returned by the slave.  SMBus allows at most
+ *	I2C_SMBUS_BLOCK_MAX bytes.
+ *
+ * This executes the SMBus "block read" protocol if supported by the adapter.
+ * If block read is not supported, it emulates it using either word or byte
+ * read protocols depending on availability.
+ *
+ * The addresses of the I2C slave device that are accessed with this function
+ * must be mapped to a linear region, so that a block read will have the same
+ * effect as a byte read. Before using this function you must double-check
+ * if the I2C slave does support exchanging a block transfer with a byte
+ * transfer.
+ */
+s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
+					      u8 command, u8 length, u8 *values)
+{
+	u8 i = 0;
+	int status;
+
+	if (length > I2C_SMBUS_BLOCK_MAX)
+		length = I2C_SMBUS_BLOCK_MAX;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return i2c_smbus_read_i2c_block_data(client, command, length, values);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA))
+		return -EOPNOTSUPP;
+
+	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+		while ((i + 2) <= length) {
+			status = i2c_smbus_read_word_data(client, command + i);
+			if (status < 0)
+				return status;
+			values[i] = status & 0xff;
+			values[i + 1] = status >> 8;
+			i += 2;
+		}
+	}
+
+	while (i < length) {
+		status = i2c_smbus_read_byte_data(client, command + i);
+		if (status < 0)
+			return status;
+		values[i] = status;
+		i++;
+	}
+
+	return i;
+}
+EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
deleted file mode 100644
index 82576aac..0000000
--- a/drivers/i2c/i2c-core.c
+++ /dev/null

@@ -1,3831 +0,0 @@
-/* i2c-core.c - a device driver for the iic-bus interface		     */
-/* ------------------------------------------------------------------------- */
-/*   Copyright (C) 1995-99 Simon G. Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.			     */
-/* ------------------------------------------------------------------------- */
-
-/* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi>.
-   All SMBus-related things are written by Frodo Looijaard <frodol@dds.nl>
-   SMBus 2.0 support by Mark Studebaker <mdsxyz123@yahoo.com> and
-   Jean Delvare <jdelvare@suse.de>
-   Mux support by Rodolfo Giometti <giometti@enneenne.com> and
-   Michael Lawnick <michael.lawnick.ext@nsn.com>
-   OF support is copyright (c) 2008 Jochen Friedrich <jochen@scram.de>
-   (based on a previous patch from Jon Smirl <jonsmirl@gmail.com>) and
-   (c) 2013  Wolfram Sang <wsa@the-dreams.de>
-   I2C ACPI code Copyright (C) 2014 Intel Corp
-   Author: Lan Tianyu <tianyu.lan@intel.com>
-   I2C slave support (c) 2014 by Wolfram Sang <wsa@sang-engineering.com>
- */
-
-#define pr_fmt(fmt) "i2c-core: " fmt
-
-#include <dt-bindings/i2c/i2c.h>
-#include <linux/uaccess.h>
-#include <linux/acpi.h>
-#include <linux/clk/clk-conf.h>
-#include <linux/completion.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/gpio.h>
-#include <linux/hardirq.h>
-#include <linux/i2c.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/irqflags.h>
-#include <linux/jump_label.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/of_device.h>
-#include <linux/of.h>
-#include <linux/of_irq.h>
-#include <linux/pm_domain.h>
-#include <linux/pm_runtime.h>
-#include <linux/pm_wakeirq.h>
-#include <linux/property.h>
-#include <linux/rwsem.h>
-#include <linux/slab.h>
-
-#include "i2c-core.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/i2c.h>
-
-#define I2C_ADDR_OFFSET_TEN_BIT	0xa000
-#define I2C_ADDR_OFFSET_SLAVE	0x1000
-
-#define I2C_ADDR_7BITS_MAX	0x77
-#define I2C_ADDR_7BITS_COUNT	(I2C_ADDR_7BITS_MAX + 1)
-
-/* core_lock protects i2c_adapter_idr, and guarantees
-   that device detection, deletion of detected devices, and attach_adapter
-   calls are serialized */
-static DEFINE_MUTEX(core_lock);
-static DEFINE_IDR(i2c_adapter_idr);
-
-static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
-
-static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE;
-static bool is_registered;
-
-int i2c_transfer_trace_reg(void)
-{
-	static_key_slow_inc(&i2c_trace_msg);
-	return 0;
-}
-
-void i2c_transfer_trace_unreg(void)
-{
-	static_key_slow_dec(&i2c_trace_msg);
-}
-
-#if defined(CONFIG_ACPI)
-struct i2c_acpi_handler_data {
-	struct acpi_connection_info info;
-	struct i2c_adapter *adapter;
-};
-
-struct gsb_buffer {
-	u8	status;
-	u8	len;
-	union {
-		u16	wdata;
-		u8	bdata;
-		u8	data[0];
-	};
-} __packed;
-
-struct i2c_acpi_lookup {
-	struct i2c_board_info *info;
-	acpi_handle adapter_handle;
-	acpi_handle device_handle;
-	acpi_handle search_handle;
-	int n;
-	int index;
-	u32 speed;
-	u32 min_speed;
-};
-
-static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data)
-{
-	struct i2c_acpi_lookup *lookup = data;
-	struct i2c_board_info *info = lookup->info;
-	struct acpi_resource_i2c_serialbus *sb;
-	acpi_status status;
-
-	if (info->addr || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-		return 1;
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C)
-		return 1;
-
-	if (lookup->index != -1 && lookup->n++ != lookup->index)
-		return 1;
-
-	status = acpi_get_handle(lookup->device_handle,
-				 sb->resource_source.string_ptr,
-				 &lookup->adapter_handle);
-	if (!ACPI_SUCCESS(status))
-		return 1;
-
-	info->addr = sb->slave_address;
-	lookup->speed = sb->connection_speed;
-	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-		info->flags |= I2C_CLIENT_TEN;
-
-	return 1;
-}
-
-static int i2c_acpi_do_lookup(struct acpi_device *adev,
-			      struct i2c_acpi_lookup *lookup)
-{
-	struct i2c_board_info *info = lookup->info;
-	struct list_head resource_list;
-	int ret;
-
-	if (acpi_bus_get_status(adev) || !adev->status.present ||
-	    acpi_device_enumerated(adev))
-		return -EINVAL;
-
-	memset(info, 0, sizeof(*info));
-	lookup->device_handle = acpi_device_handle(adev);
-
-	/* Look up for I2cSerialBus resource */
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     i2c_acpi_fill_info, lookup);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info->addr)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int i2c_acpi_get_info(struct acpi_device *adev,
-			     struct i2c_board_info *info,
-			     struct i2c_adapter *adapter,
-			     acpi_handle *adapter_handle)
-{
-	struct list_head resource_list;
-	struct resource_entry *entry;
-	struct i2c_acpi_lookup lookup;
-	int ret;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.info = info;
-	lookup.index = -1;
-
-	ret = i2c_acpi_do_lookup(adev, &lookup);
-	if (ret)
-		return ret;
-
-	if (adapter) {
-		/* The adapter must match the one in I2cSerialBus() connector */
-		if (ACPI_HANDLE(&adapter->dev) != lookup.adapter_handle)
-			return -ENODEV;
-	} else {
-		struct acpi_device *adapter_adev;
-
-		/* The adapter must be present */
-		if (acpi_bus_get_device(lookup.adapter_handle, &adapter_adev))
-			return -ENODEV;
-		if (acpi_bus_get_status(adapter_adev) ||
-		    !adapter_adev->status.present)
-			return -ENODEV;
-	}
-
-	info->fwnode = acpi_fwnode_handle(adev);
-	if (adapter_handle)
-		*adapter_handle = lookup.adapter_handle;
-
-	/* Then fill IRQ number if any */
-	INIT_LIST_HEAD(&resource_list);
-	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
-	if (ret < 0)
-		return -EINVAL;
-
-	resource_list_for_each_entry(entry, &resource_list) {
-		if (resource_type(entry->res) == IORESOURCE_IRQ) {
-			info->irq = entry->res->start;
-			break;
-		}
-	}
-
-	acpi_dev_free_resource_list(&resource_list);
-
-	acpi_set_modalias(adev, dev_name(&adev->dev), info->type,
-			  sizeof(info->type));
-
-	return 0;
-}
-
-static void i2c_acpi_register_device(struct i2c_adapter *adapter,
-				     struct acpi_device *adev,
-				     struct i2c_board_info *info)
-{
-	adev->power.flags.ignore_parent = true;
-	acpi_device_set_enumerated(adev);
-
-	if (!i2c_new_device(adapter, info)) {
-		adev->power.flags.ignore_parent = false;
-		dev_err(&adapter->dev,
-			"failed to add I2C device %s from ACPI\n",
-			dev_name(&adev->dev));
-	}
-}
-
-static acpi_status i2c_acpi_add_device(acpi_handle handle, u32 level,
-				       void *data, void **return_value)
-{
-	struct i2c_adapter *adapter = data;
-	struct acpi_device *adev;
-	struct i2c_board_info info;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-
-	if (i2c_acpi_get_info(adev, &info, adapter, NULL))
-		return AE_OK;
-
-	i2c_acpi_register_device(adapter, adev, &info);
-
-	return AE_OK;
-}
-
-#define I2C_ACPI_MAX_SCAN_DEPTH 32
-
-/**
- * i2c_acpi_register_devices - enumerate I2C slave devices behind adapter
- * @adap: pointer to adapter
- *
- * Enumerate all I2C slave devices behind this adapter by walking the ACPI
- * namespace. When a device is found it will be added to the Linux device
- * model and bound to the corresponding ACPI handle.
- */
-static void i2c_acpi_register_devices(struct i2c_adapter *adap)
-{
-	acpi_status status;
-
-	if (!has_acpi_companion(&adap->dev))
-		return;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-				     I2C_ACPI_MAX_SCAN_DEPTH,
-				     i2c_acpi_add_device, NULL,
-				     adap, NULL);
-	if (ACPI_FAILURE(status))
-		dev_warn(&adap->dev, "failed to enumerate I2C slaves\n");
-}
-
-static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
-					   void *data, void **return_value)
-{
-	struct i2c_acpi_lookup *lookup = data;
-	struct acpi_device *adev;
-
-	if (acpi_bus_get_device(handle, &adev))
-		return AE_OK;
-
-	if (i2c_acpi_do_lookup(adev, lookup))
-		return AE_OK;
-
-	if (lookup->search_handle != lookup->adapter_handle)
-		return AE_OK;
-
-	if (lookup->speed <= lookup->min_speed)
-		lookup->min_speed = lookup->speed;
-
-	return AE_OK;
-}
-
-/**
- * i2c_acpi_find_bus_speed - find I2C bus speed from ACPI
- * @dev: The device owning the bus
- *
- * Find the I2C bus speed by walking the ACPI namespace for all I2C slaves
- * devices connected to this bus and use the speed of slowest device.
- *
- * Returns the speed in Hz or zero
- */
-u32 i2c_acpi_find_bus_speed(struct device *dev)
-{
-	struct i2c_acpi_lookup lookup;
-	struct i2c_board_info dummy;
-	acpi_status status;
-
-	if (!has_acpi_companion(dev))
-		return 0;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.search_handle = ACPI_HANDLE(dev);
-	lookup.min_speed = UINT_MAX;
-	lookup.info = &dummy;
-	lookup.index = -1;
-
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-				     I2C_ACPI_MAX_SCAN_DEPTH,
-				     i2c_acpi_lookup_speed, NULL,
-				     &lookup, NULL);
-
-	if (ACPI_FAILURE(status)) {
-		dev_warn(dev, "unable to find I2C bus speed from ACPI\n");
-		return 0;
-	}
-
-	return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
-}
-EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
-
-static int i2c_acpi_match_adapter(struct device *dev, void *data)
-{
-	struct i2c_adapter *adapter = i2c_verify_adapter(dev);
-
-	if (!adapter)
-		return 0;
-
-	return ACPI_HANDLE(dev) == (acpi_handle)data;
-}
-
-static int i2c_acpi_match_device(struct device *dev, void *data)
-{
-	return ACPI_COMPANION(dev) == data;
-}
-
-static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
-{
-	struct device *dev;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, handle,
-			      i2c_acpi_match_adapter);
-	return dev ? i2c_verify_adapter(dev) : NULL;
-}
-
-static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
-{
-	struct device *dev;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device);
-	return dev ? i2c_verify_client(dev) : NULL;
-}
-
-static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value,
-			   void *arg)
-{
-	struct acpi_device *adev = arg;
-	struct i2c_board_info info;
-	acpi_handle adapter_handle;
-	struct i2c_adapter *adapter;
-	struct i2c_client *client;
-
-	switch (value) {
-	case ACPI_RECONFIG_DEVICE_ADD:
-		if (i2c_acpi_get_info(adev, &info, NULL, &adapter_handle))
-			break;
-
-		adapter = i2c_acpi_find_adapter_by_handle(adapter_handle);
-		if (!adapter)
-			break;
-
-		i2c_acpi_register_device(adapter, adev, &info);
-		break;
-	case ACPI_RECONFIG_DEVICE_REMOVE:
-		if (!acpi_device_enumerated(adev))
-			break;
-
-		client = i2c_acpi_find_client_by_adev(adev);
-		if (!client)
-			break;
-
-		i2c_unregister_device(client);
-		put_device(&client->dev);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block i2c_acpi_notifier = {
-	.notifier_call = i2c_acpi_notify,
-};
-
-/**
- * i2c_acpi_new_device - Create i2c-client for the Nth I2cSerialBus resource
- * @dev:     Device owning the ACPI resources to get the client from
- * @index:   Index of ACPI resource to get
- * @info:    describes the I2C device; note this is modified (addr gets set)
- * Context: can sleep
- *
- * By default the i2c subsys creates an i2c-client for the first I2cSerialBus
- * resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus
- * resources, in that case this function can be used to create an i2c-client
- * for other I2cSerialBus resources in the Current Resource Settings table.
- *
- * Also see i2c_new_device, which this function calls to create the i2c-client.
- *
- * Returns a pointer to the new i2c-client, or NULL if the adapter is not found.
- */
-struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
-				       struct i2c_board_info *info)
-{
-	struct i2c_acpi_lookup lookup;
-	struct i2c_adapter *adapter;
-	struct acpi_device *adev;
-	LIST_HEAD(resource_list);
-	int ret;
-
-	adev = ACPI_COMPANION(dev);
-	if (!adev)
-		return NULL;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.info = info;
-	lookup.device_handle = acpi_device_handle(adev);
-	lookup.index = index;
-
-	ret = acpi_dev_get_resources(adev, &resource_list,
-				     i2c_acpi_fill_info, &lookup);
-	acpi_dev_free_resource_list(&resource_list);
-
-	if (ret < 0 || !info->addr)
-		return NULL;
-
-	adapter = i2c_acpi_find_adapter_by_handle(lookup.adapter_handle);
-	if (!adapter)
-		return NULL;
-
-	return i2c_new_device(adapter, info);
-}
-EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
-#else /* CONFIG_ACPI */
-static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }
-extern struct notifier_block i2c_acpi_notifier;
-#endif /* CONFIG_ACPI */
-
-#ifdef CONFIG_ACPI_I2C_OPREGION
-static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[2];
-	int ret;
-	u8 *buffer;
-
-	buffer = kzalloc(data_len, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = 1;
-	msgs[0].buf = &cmd;
-
-	msgs[1].addr = client->addr;
-	msgs[1].flags = client->flags | I2C_M_RD;
-	msgs[1].len = data_len;
-	msgs[1].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c read failed\n");
-	else
-		memcpy(data, buffer, data_len);
-
-	kfree(buffer);
-	return ret;
-}
-
-static int acpi_gsb_i2c_write_bytes(struct i2c_client *client,
-		u8 cmd, u8 *data, u8 data_len)
-{
-
-	struct i2c_msg msgs[1];
-	u8 *buffer;
-	int ret = AE_OK;
-
-	buffer = kzalloc(data_len + 1, GFP_KERNEL);
-	if (!buffer)
-		return AE_NO_MEMORY;
-
-	buffer[0] = cmd;
-	memcpy(buffer + 1, data, data_len);
-
-	msgs[0].addr = client->addr;
-	msgs[0].flags = client->flags;
-	msgs[0].len = data_len + 1;
-	msgs[0].buf = buffer;
-
-	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		dev_err(&client->adapter->dev, "i2c write failed\n");
-
-	kfree(buffer);
-	return ret;
-}
-
-static acpi_status
-i2c_acpi_space_handler(u32 function, acpi_physical_address command,
-			u32 bits, u64 *value64,
-			void *handler_context, void *region_context)
-{
-	struct gsb_buffer *gsb = (struct gsb_buffer *)value64;
-	struct i2c_acpi_handler_data *data = handler_context;
-	struct acpi_connection_info *info = &data->info;
-	struct acpi_resource_i2c_serialbus *sb;
-	struct i2c_adapter *adapter = data->adapter;
-	struct i2c_client *client;
-	struct acpi_resource *ares;
-	u32 accessor_type = function >> 16;
-	u8 action = function & ACPI_IO_MASK;
-	acpi_status ret;
-	int status;
-
-	ret = acpi_buffer_to_resource(info->connection, info->length, &ares);
-	if (ACPI_FAILURE(ret))
-		return ret;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (!client) {
-		ret = AE_NO_MEMORY;
-		goto err;
-	}
-
-	if (!value64 || ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	sb = &ares->data.i2c_serial_bus;
-	if (sb->type != ACPI_RESOURCE_SERIAL_TYPE_I2C) {
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	client->adapter = adapter;
-	client->addr = sb->slave_address;
-
-	if (sb->access_mode == ACPI_I2C_10BIT_MODE)
-		client->flags |= I2C_CLIENT_TEN;
-
-	switch (accessor_type) {
-	case ACPI_GSB_ACCESS_ATTRIB_SEND_RCV:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte(client);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte(client, gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BYTE:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_byte_data(client, command);
-			if (status >= 0) {
-				gsb->bdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_byte_data(client, command,
-					gsb->bdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_WORD:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_word_data(client, command);
-			if (status >= 0) {
-				gsb->wdata = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_word_data(client, command,
-					gsb->wdata);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_BLOCK:
-		if (action == ACPI_READ) {
-			status = i2c_smbus_read_block_data(client, command,
-					gsb->data);
-			if (status >= 0) {
-				gsb->len = status;
-				status = 0;
-			}
-		} else {
-			status = i2c_smbus_write_block_data(client, command,
-					gsb->len, gsb->data);
-		}
-		break;
-
-	case ACPI_GSB_ACCESS_ATTRIB_MULTIBYTE:
-		if (action == ACPI_READ) {
-			status = acpi_gsb_i2c_read_bytes(client, command,
-					gsb->data, info->access_length);
-			if (status > 0)
-				status = 0;
-		} else {
-			status = acpi_gsb_i2c_write_bytes(client, command,
-					gsb->data, info->access_length);
-		}
-		break;
-
-	default:
-		dev_warn(&adapter->dev, "protocol 0x%02x not supported for client 0x%02x\n",
-			 accessor_type, client->addr);
-		ret = AE_BAD_PARAMETER;
-		goto err;
-	}
-
-	gsb->status = status;
-
- err:
-	kfree(client);
-	ACPI_FREE(ares);
-	return ret;
-}
-
-
-static int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle;
-	struct i2c_acpi_handler_data *data;
-	acpi_status status;
-
-	if (!adapter->dev.parent)
-		return -ENODEV;
-
-	handle = ACPI_HANDLE(adapter->dev.parent);
-
-	if (!handle)
-		return -ENODEV;
-
-	data = kzalloc(sizeof(struct i2c_acpi_handler_data),
-			    GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->adapter = adapter;
-	status = acpi_bus_attach_private_data(handle, (void *)data);
-	if (ACPI_FAILURE(status)) {
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	status = acpi_install_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&i2c_acpi_space_handler,
-				NULL,
-				data);
-	if (ACPI_FAILURE(status)) {
-		dev_err(&adapter->dev, "Error installing i2c space handler\n");
-		acpi_bus_detach_private_data(handle);
-		kfree(data);
-		return -ENOMEM;
-	}
-
-	acpi_walk_dep_device_list(handle);
-	return 0;
-}
-
-static void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
-{
-	acpi_handle handle;
-	struct i2c_acpi_handler_data *data;
-	acpi_status status;
-
-	if (!adapter->dev.parent)
-		return;
-
-	handle = ACPI_HANDLE(adapter->dev.parent);
-
-	if (!handle)
-		return;
-
-	acpi_remove_address_space_handler(handle,
-				ACPI_ADR_SPACE_GSBUS,
-				&i2c_acpi_space_handler);
-
-	status = acpi_bus_get_private_data(handle, (void **)&data);
-	if (ACPI_SUCCESS(status))
-		kfree(data);
-
-	acpi_bus_detach_private_data(handle);
-}
-#else /* CONFIG_ACPI_I2C_OPREGION */
-static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter)
-{ }
-
-static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter)
-{ return 0; }
-#endif /* CONFIG_ACPI_I2C_OPREGION */
-
-/* ------------------------------------------------------------------------- */
-
-const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
-						const struct i2c_client *client)
-{
-	if (!(id && client))
-		return NULL;
-
-	while (id->name[0]) {
-		if (strcmp(client->name, id->name) == 0)
-			return id;
-		id++;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(i2c_match_id);
-
-static int i2c_device_match(struct device *dev, struct device_driver *drv)
-{
-	struct i2c_client	*client = i2c_verify_client(dev);
-	struct i2c_driver	*driver;
-
-
-	/* Attempt an OF style match */
-	if (i2c_of_match_device(drv->of_match_table, client))
-		return 1;
-
-	/* Then ACPI style match */
-	if (acpi_driver_match_device(dev, drv))
-		return 1;
-
-	driver = to_i2c_driver(drv);
-
-	/* Finally an I2C match */
-	if (i2c_match_id(driver->id_table, client))
-		return 1;
-
-	return 0;
-}
-
-static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	int rc;
-
-	rc = acpi_device_uevent_modalias(dev, env);
-	if (rc != -ENODEV)
-		return rc;
-
-	return add_uevent_var(env, "MODALIAS=%s%s", I2C_MODULE_PREFIX, client->name);
-}
-
-/* i2c bus recovery routines */
-static int get_scl_gpio_value(struct i2c_adapter *adap)
-{
-	return gpio_get_value(adap->bus_recovery_info->scl_gpio);
-}
-
-static void set_scl_gpio_value(struct i2c_adapter *adap, int val)
-{
-	gpio_set_value(adap->bus_recovery_info->scl_gpio, val);
-}
-
-static int get_sda_gpio_value(struct i2c_adapter *adap)
-{
-	return gpio_get_value(adap->bus_recovery_info->sda_gpio);
-}
-
-static int i2c_get_gpios_for_recovery(struct i2c_adapter *adap)
-{
-	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	struct device *dev = &adap->dev;
-	int ret = 0;
-
-	ret = gpio_request_one(bri->scl_gpio, GPIOF_OPEN_DRAIN |
-			GPIOF_OUT_INIT_HIGH, "i2c-scl");
-	if (ret) {
-		dev_warn(dev, "Can't get SCL gpio: %d\n", bri->scl_gpio);
-		return ret;
-	}
-
-	if (bri->get_sda) {
-		if (gpio_request_one(bri->sda_gpio, GPIOF_IN, "i2c-sda")) {
-			/* work without SDA polling */
-			dev_warn(dev, "Can't get SDA gpio: %d. Not using SDA polling\n",
-					bri->sda_gpio);
-			bri->get_sda = NULL;
-		}
-	}
-
-	return ret;
-}
-
-static void i2c_put_gpios_for_recovery(struct i2c_adapter *adap)
-{
-	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-
-	if (bri->get_sda)
-		gpio_free(bri->sda_gpio);
-
-	gpio_free(bri->scl_gpio);
-}
-
-/*
- * We are generating clock pulses. ndelay() determines durating of clk pulses.
- * We will generate clock with rate 100 KHz and so duration of both clock levels
- * is: delay in ns = (10^6 / 100) / 2
- */
-#define RECOVERY_NDELAY		5000
-#define RECOVERY_CLK_CNT	9
-
-static int i2c_generic_recovery(struct i2c_adapter *adap)
-{
-	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	int i = 0, val = 1, ret = 0;
-
-	if (bri->prepare_recovery)
-		bri->prepare_recovery(adap);
-
-	bri->set_scl(adap, val);
-	ndelay(RECOVERY_NDELAY);
-
-	/*
-	 * By this time SCL is high, as we need to give 9 falling-rising edges
-	 */
-	while (i++ < RECOVERY_CLK_CNT * 2) {
-		if (val) {
-			/* Break if SDA is high */
-			if (bri->get_sda && bri->get_sda(adap))
-					break;
-			/* SCL shouldn't be low here */
-			if (!bri->get_scl(adap)) {
-				dev_err(&adap->dev,
-					"SCL is stuck low, exit recovery\n");
-				ret = -EBUSY;
-				break;
-			}
-		}
-
-		val = !val;
-		bri->set_scl(adap, val);
-		ndelay(RECOVERY_NDELAY);
-	}
-
-	if (bri->unprepare_recovery)
-		bri->unprepare_recovery(adap);
-
-	return ret;
-}
-
-int i2c_generic_scl_recovery(struct i2c_adapter *adap)
-{
-	return i2c_generic_recovery(adap);
-}
-EXPORT_SYMBOL_GPL(i2c_generic_scl_recovery);
-
-int i2c_generic_gpio_recovery(struct i2c_adapter *adap)
-{
-	int ret;
-
-	ret = i2c_get_gpios_for_recovery(adap);
-	if (ret)
-		return ret;
-
-	ret = i2c_generic_recovery(adap);
-	i2c_put_gpios_for_recovery(adap);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2c_generic_gpio_recovery);
-
-int i2c_recover_bus(struct i2c_adapter *adap)
-{
-	if (!adap->bus_recovery_info)
-		return -EOPNOTSUPP;
-
-	dev_dbg(&adap->dev, "Trying i2c bus recovery\n");
-	return adap->bus_recovery_info->recover_bus(adap);
-}
-EXPORT_SYMBOL_GPL(i2c_recover_bus);
-
-static void i2c_init_recovery(struct i2c_adapter *adap)
-{
-	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	char *err_str;
-
-	if (!bri)
-		return;
-
-	if (!bri->recover_bus) {
-		err_str = "no recover_bus() found";
-		goto err;
-	}
-
-	/* Generic GPIO recovery */
-	if (bri->recover_bus == i2c_generic_gpio_recovery) {
-		if (!gpio_is_valid(bri->scl_gpio)) {
-			err_str = "invalid SCL gpio";
-			goto err;
-		}
-
-		if (gpio_is_valid(bri->sda_gpio))
-			bri->get_sda = get_sda_gpio_value;
-		else
-			bri->get_sda = NULL;
-
-		bri->get_scl = get_scl_gpio_value;
-		bri->set_scl = set_scl_gpio_value;
-	} else if (bri->recover_bus == i2c_generic_scl_recovery) {
-		/* Generic SCL recovery */
-		if (!bri->set_scl || !bri->get_scl) {
-			err_str = "no {get|set}_scl() found";
-			goto err;
-		}
-	}
-
-	return;
- err:
-	dev_err(&adap->dev, "Not using recovery: %s\n", err_str);
-	adap->bus_recovery_info = NULL;
-}
-
-static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
-{
-	struct i2c_adapter *adap = client->adapter;
-	unsigned int irq;
-
-	if (!adap->host_notify_domain)
-		return -ENXIO;
-
-	if (client->flags & I2C_CLIENT_TEN)
-		return -EINVAL;
-
-	irq = irq_find_mapping(adap->host_notify_domain, client->addr);
-	if (!irq)
-		irq = irq_create_mapping(adap->host_notify_domain,
-					 client->addr);
-
-	return irq > 0 ? irq : -ENXIO;
-}
-
-static int i2c_device_probe(struct device *dev)
-{
-	struct i2c_client	*client = i2c_verify_client(dev);
-	struct i2c_driver	*driver;
-	int status;
-
-	if (!client)
-		return 0;
-
-	driver = to_i2c_driver(dev->driver);
-
-	if (!client->irq && !driver->disable_i2c_core_irq_mapping) {
-		int irq = -ENOENT;
-
-		if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
-			dev_dbg(dev, "Using Host Notify IRQ\n");
-			irq = i2c_smbus_host_notify_to_irq(client);
-		} else if (dev->of_node) {
-			irq = of_irq_get_byname(dev->of_node, "irq");
-			if (irq == -EINVAL || irq == -ENODATA)
-				irq = of_irq_get(dev->of_node, 0);
-		} else if (ACPI_COMPANION(dev)) {
-			irq = acpi_dev_gpio_irq_get(ACPI_COMPANION(dev), 0);
-		}
-		if (irq == -EPROBE_DEFER)
-			return irq;
-
-		if (irq < 0)
-			irq = 0;
-
-		client->irq = irq;
-	}
-
-	/*
-	 * An I2C ID table is not mandatory, if and only if, a suitable Device
-	 * Tree match table entry is supplied for the probing device.
-	 */
-	if (!driver->id_table &&
-	    !i2c_of_match_device(dev->driver->of_match_table, client))
-		return -ENODEV;
-
-	if (client->flags & I2C_CLIENT_WAKE) {
-		int wakeirq = -ENOENT;
-
-		if (dev->of_node) {
-			wakeirq = of_irq_get_byname(dev->of_node, "wakeup");
-			if (wakeirq == -EPROBE_DEFER)
-				return wakeirq;
-		}
-
-		device_init_wakeup(&client->dev, true);
-
-		if (wakeirq > 0 && wakeirq != client->irq)
-			status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
-		else if (client->irq > 0)
-			status = dev_pm_set_wake_irq(dev, client->irq);
-		else
-			status = 0;
-
-		if (status)
-			dev_warn(&client->dev, "failed to set up wakeup irq\n");
-	}
-
-	dev_dbg(dev, "probe\n");
-
-	status = of_clk_set_defaults(dev->of_node, false);
-	if (status < 0)
-		goto err_clear_wakeup_irq;
-
-	status = dev_pm_domain_attach(&client->dev, true);
-	if (status == -EPROBE_DEFER)
-		goto err_clear_wakeup_irq;
-
-	/*
-	 * When there are no more users of probe(),
-	 * rename probe_new to probe.
-	 */
-	if (driver->probe_new)
-		status = driver->probe_new(client);
-	else if (driver->probe)
-		status = driver->probe(client,
-				       i2c_match_id(driver->id_table, client));
-	else
-		status = -EINVAL;
-
-	if (status)
-		goto err_detach_pm_domain;
-
-	return 0;
-
-err_detach_pm_domain:
-	dev_pm_domain_detach(&client->dev, true);
-err_clear_wakeup_irq:
-	dev_pm_clear_wake_irq(&client->dev);
-	device_init_wakeup(&client->dev, false);
-	return status;
-}
-
-static int i2c_device_remove(struct device *dev)
-{
-	struct i2c_client	*client = i2c_verify_client(dev);
-	struct i2c_driver	*driver;
-	int status = 0;
-
-	if (!client || !dev->driver)
-		return 0;
-
-	driver = to_i2c_driver(dev->driver);
-	if (driver->remove) {
-		dev_dbg(dev, "remove\n");
-		status = driver->remove(client);
-	}
-
-	dev_pm_domain_detach(&client->dev, true);
-
-	dev_pm_clear_wake_irq(&client->dev);
-	device_init_wakeup(&client->dev, false);
-
-	return status;
-}
-
-static void i2c_device_shutdown(struct device *dev)
-{
-	struct i2c_client *client = i2c_verify_client(dev);
-	struct i2c_driver *driver;
-
-	if (!client || !dev->driver)
-		return;
-	driver = to_i2c_driver(dev->driver);
-	if (driver->shutdown)
-		driver->shutdown(client);
-}
-
-static void i2c_client_dev_release(struct device *dev)
-{
-	kfree(to_i2c_client(dev));
-}
-
-static ssize_t
-show_name(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%s\n", dev->type == &i2c_client_type ?
-		       to_i2c_client(dev)->name : to_i2c_adapter(dev)->name);
-}
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-
-static ssize_t
-show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	int len;
-
-	len = acpi_device_modalias(dev, buf, PAGE_SIZE -1);
-	if (len != -ENODEV)
-		return len;
-
-	return sprintf(buf, "%s%s\n", I2C_MODULE_PREFIX, client->name);
-}
-static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL);
-
-static struct attribute *i2c_dev_attrs[] = {
-	&dev_attr_name.attr,
-	/* modalias helps coldplug:  modprobe $(cat .../modalias) */
-	&dev_attr_modalias.attr,
-	NULL
-};
-ATTRIBUTE_GROUPS(i2c_dev);
-
-struct bus_type i2c_bus_type = {
-	.name		= "i2c",
-	.match		= i2c_device_match,
-	.probe		= i2c_device_probe,
-	.remove		= i2c_device_remove,
-	.shutdown	= i2c_device_shutdown,
-};
-EXPORT_SYMBOL_GPL(i2c_bus_type);
-
-struct device_type i2c_client_type = {
-	.groups		= i2c_dev_groups,
-	.uevent		= i2c_device_uevent,
-	.release	= i2c_client_dev_release,
-};
-EXPORT_SYMBOL_GPL(i2c_client_type);
-
-
-/**
- * i2c_verify_client - return parameter as i2c_client, or NULL
- * @dev: device, probably from some driver model iterator
- *
- * When traversing the driver model tree, perhaps using driver model
- * iterators like @device_for_each_child(), you can't assume very much
- * about the nodes you find.  Use this function to avoid oopses caused
- * by wrongly treating some non-I2C device as an i2c_client.
- */
-struct i2c_client *i2c_verify_client(struct device *dev)
-{
-	return (dev->type == &i2c_client_type)
-			? to_i2c_client(dev)
-			: NULL;
-}
-EXPORT_SYMBOL(i2c_verify_client);
-
-
-/* Return a unique address which takes the flags of the client into account */
-static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
-{
-	unsigned short addr = client->addr;
-
-	/* For some client flags, add an arbitrary offset to avoid collisions */
-	if (client->flags & I2C_CLIENT_TEN)
-		addr |= I2C_ADDR_OFFSET_TEN_BIT;
-
-	if (client->flags & I2C_CLIENT_SLAVE)
-		addr |= I2C_ADDR_OFFSET_SLAVE;
-
-	return addr;
-}
-
-/* This is a permissive address validity check, I2C address map constraints
- * are purposely not enforced, except for the general call address. */
-static int i2c_check_addr_validity(unsigned addr, unsigned short flags)
-{
-	if (flags & I2C_CLIENT_TEN) {
-		/* 10-bit address, all values are valid */
-		if (addr > 0x3ff)
-			return -EINVAL;
-	} else {
-		/* 7-bit address, reject the general call address */
-		if (addr == 0x00 || addr > 0x7f)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-/* And this is a strict address validity check, used when probing. If a
- * device uses a reserved address, then it shouldn't be probed. 7-bit
- * addressing is assumed, 10-bit address devices are rare and should be
- * explicitly enumerated. */
-static int i2c_check_7bit_addr_validity_strict(unsigned short addr)
-{
-	/*
-	 * Reserved addresses per I2C specification:
-	 *  0x00       General call address / START byte
-	 *  0x01       CBUS address
-	 *  0x02       Reserved for different bus format
-	 *  0x03       Reserved for future purposes
-	 *  0x04-0x07  Hs-mode master code
-	 *  0x78-0x7b  10-bit slave addressing
-	 *  0x7c-0x7f  Reserved for future purposes
-	 */
-	if (addr < 0x08 || addr > 0x77)
-		return -EINVAL;
-	return 0;
-}
-
-static int __i2c_check_addr_busy(struct device *dev, void *addrp)
-{
-	struct i2c_client	*client = i2c_verify_client(dev);
-	int			addr = *(int *)addrp;
-
-	if (client && i2c_encode_flags_to_addr(client) == addr)
-		return -EBUSY;
-	return 0;
-}
-
-/* walk up mux tree */
-static int i2c_check_mux_parents(struct i2c_adapter *adapter, int addr)
-{
-	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
-	int result;
-
-	result = device_for_each_child(&adapter->dev, &addr,
-					__i2c_check_addr_busy);
-
-	if (!result && parent)
-		result = i2c_check_mux_parents(parent, addr);
-
-	return result;
-}
-
-/* recurse down mux tree */
-static int i2c_check_mux_children(struct device *dev, void *addrp)
-{
-	int result;
-
-	if (dev->type == &i2c_adapter_type)
-		result = device_for_each_child(dev, addrp,
-						i2c_check_mux_children);
-	else
-		result = __i2c_check_addr_busy(dev, addrp);
-
-	return result;
-}
-
-static int i2c_check_addr_busy(struct i2c_adapter *adapter, int addr)
-{
-	struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter);
-	int result = 0;
-
-	if (parent)
-		result = i2c_check_mux_parents(parent, addr);
-
-	if (!result)
-		result = device_for_each_child(&adapter->dev, &addr,
-						i2c_check_mux_children);
-
-	return result;
-}
-
-/**
- * i2c_adapter_lock_bus - Get exclusive access to an I2C bus segment
- * @adapter: Target I2C bus segment
- * @flags: I2C_LOCK_ROOT_ADAPTER locks the root i2c adapter, I2C_LOCK_SEGMENT
- *	locks only this branch in the adapter tree
- */
-static void i2c_adapter_lock_bus(struct i2c_adapter *adapter,
-				 unsigned int flags)
-{
-	rt_mutex_lock(&adapter->bus_lock);
-}
-
-/**
- * i2c_adapter_trylock_bus - Try to get exclusive access to an I2C bus segment
- * @adapter: Target I2C bus segment
- * @flags: I2C_LOCK_ROOT_ADAPTER trylocks the root i2c adapter, I2C_LOCK_SEGMENT
- *	trylocks only this branch in the adapter tree
- */
-static int i2c_adapter_trylock_bus(struct i2c_adapter *adapter,
-				   unsigned int flags)
-{
-	return rt_mutex_trylock(&adapter->bus_lock);
-}
-
-/**
- * i2c_adapter_unlock_bus - Release exclusive access to an I2C bus segment
- * @adapter: Target I2C bus segment
- * @flags: I2C_LOCK_ROOT_ADAPTER unlocks the root i2c adapter, I2C_LOCK_SEGMENT
- *	unlocks only this branch in the adapter tree
- */
-static void i2c_adapter_unlock_bus(struct i2c_adapter *adapter,
-				   unsigned int flags)
-{
-	rt_mutex_unlock(&adapter->bus_lock);
-}
-
-static void i2c_dev_set_name(struct i2c_adapter *adap,
-			     struct i2c_client *client)
-{
-	struct acpi_device *adev = ACPI_COMPANION(&client->dev);
-
-	if (adev) {
-		dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
-		return;
-	}
-
-	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
-		     i2c_encode_flags_to_addr(client));
-}
-
-static int i2c_dev_irq_from_resources(const struct resource *resources,
-				      unsigned int num_resources)
-{
-	struct irq_data *irqd;
-	int i;
-
-	for (i = 0; i < num_resources; i++) {
-		const struct resource *r = &resources[i];
-
-		if (resource_type(r) != IORESOURCE_IRQ)
-			continue;
-
-		if (r->flags & IORESOURCE_BITS) {
-			irqd = irq_get_irq_data(r->start);
-			if (!irqd)
-				break;
-
-			irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
-		}
-
-		return r->start;
-	}
-
-	return 0;
-}
-
-/**
- * i2c_new_device - instantiate an i2c device
- * @adap: the adapter managing the device
- * @info: describes one I2C device; bus_num is ignored
- * Context: can sleep
- *
- * Create an i2c device. Binding is handled through driver model
- * probe()/remove() methods.  A driver may be bound to this device when we
- * return from this function, or any later moment (e.g. maybe hotplugging will
- * load the driver module).  This call is not appropriate for use by mainboard
- * initialization logic, which usually runs during an arch_initcall() long
- * before any i2c_adapter could exist.
- *
- * This returns the new i2c client, which may be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
- */
-struct i2c_client *
-i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
-{
-	struct i2c_client	*client;
-	int			status;
-
-	client = kzalloc(sizeof *client, GFP_KERNEL);
-	if (!client)
-		return NULL;
-
-	client->adapter = adap;
-
-	client->dev.platform_data = info->platform_data;
-
-	if (info->archdata)
-		client->dev.archdata = *info->archdata;
-
-	client->flags = info->flags;
-	client->addr = info->addr;
-
-	client->irq = info->irq;
-	if (!client->irq)
-		client->irq = i2c_dev_irq_from_resources(info->resources,
-							 info->num_resources);
-
-	strlcpy(client->name, info->type, sizeof(client->name));
-
-	status = i2c_check_addr_validity(client->addr, client->flags);
-	if (status) {
-		dev_err(&adap->dev, "Invalid %d-bit I2C address 0x%02hx\n",
-			client->flags & I2C_CLIENT_TEN ? 10 : 7, client->addr);
-		goto out_err_silent;
-	}
-
-	/* Check for address business */
-	status = i2c_check_addr_busy(adap, i2c_encode_flags_to_addr(client));
-	if (status)
-		goto out_err;
-
-	client->dev.parent = &client->adapter->dev;
-	client->dev.bus = &i2c_bus_type;
-	client->dev.type = &i2c_client_type;
-	client->dev.of_node = info->of_node;
-	client->dev.fwnode = info->fwnode;
-
-	i2c_dev_set_name(adap, client);
-
-	if (info->properties) {
-		status = device_add_properties(&client->dev, info->properties);
-		if (status) {
-			dev_err(&adap->dev,
-				"Failed to add properties to client %s: %d\n",
-				client->name, status);
-			goto out_err;
-		}
-	}
-
-	status = device_register(&client->dev);
-	if (status)
-		goto out_free_props;
-
-	dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n",
-		client->name, dev_name(&client->dev));
-
-	return client;
-
-out_free_props:
-	if (info->properties)
-		device_remove_properties(&client->dev);
-out_err:
-	dev_err(&adap->dev,
-		"Failed to register i2c client %s at 0x%02x (%d)\n",
-		client->name, client->addr, status);
-out_err_silent:
-	kfree(client);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(i2c_new_device);
-
-
-/**
- * i2c_unregister_device - reverse effect of i2c_new_device()
- * @client: value returned from i2c_new_device()
- * Context: can sleep
- */
-void i2c_unregister_device(struct i2c_client *client)
-{
-	if (client->dev.of_node)
-		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
-	if (ACPI_COMPANION(&client->dev))
-		acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
-	device_unregister(&client->dev);
-}
-EXPORT_SYMBOL_GPL(i2c_unregister_device);
-
-
-static const struct i2c_device_id dummy_id[] = {
-	{ "dummy", 0 },
-	{ },
-};
-
-static int dummy_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
-{
-	return 0;
-}
-
-static int dummy_remove(struct i2c_client *client)
-{
-	return 0;
-}
-
-static struct i2c_driver dummy_driver = {
-	.driver.name	= "dummy",
-	.probe		= dummy_probe,
-	.remove		= dummy_remove,
-	.id_table	= dummy_id,
-};
-
-/**
- * i2c_new_dummy - return a new i2c device bound to a dummy driver
- * @adapter: the adapter managing the device
- * @address: seven bit address to be used
- * Context: can sleep
- *
- * This returns an I2C client bound to the "dummy" driver, intended for use
- * with devices that consume multiple addresses.  Examples of such chips
- * include various EEPROMS (like 24c04 and 24c08 models).
- *
- * These dummy devices have two main uses.  First, most I2C and SMBus calls
- * except i2c_transfer() need a client handle; the dummy will be that handle.
- * And second, this prevents the specified address from being bound to a
- * different driver.
- *
- * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
- */
-struct i2c_client *i2c_new_dummy(struct i2c_adapter *adapter, u16 address)
-{
-	struct i2c_board_info info = {
-		I2C_BOARD_INFO("dummy", address),
-	};
-
-	return i2c_new_device(adapter, &info);
-}
-EXPORT_SYMBOL_GPL(i2c_new_dummy);
-
-/**
- * i2c_new_secondary_device - Helper to get the instantiated secondary address
- * and create the associated device
- * @client: Handle to the primary client
- * @name: Handle to specify which secondary address to get
- * @default_addr: Used as a fallback if no secondary address was specified
- * Context: can sleep
- *
- * I2C clients can be composed of multiple I2C slaves bound together in a single
- * component. The I2C client driver then binds to the master I2C slave and needs
- * to create I2C dummy clients to communicate with all the other slaves.
- *
- * This function creates and returns an I2C dummy client whose I2C address is
- * retrieved from the platform firmware based on the given slave name. If no
- * address is specified by the firmware default_addr is used.
- *
- * On DT-based platforms the address is retrieved from the "reg" property entry
- * cell whose "reg-names" value matches the slave name.
- *
- * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
- */
-struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
-						const char *name,
-						u16 default_addr)
-{
-	struct device_node *np = client->dev.of_node;
-	u32 addr = default_addr;
-	int i;
-
-	if (np) {
-		i = of_property_match_string(np, "reg-names", name);
-		if (i >= 0)
-			of_property_read_u32_index(np, "reg", i, &addr);
-	}
-
-	dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
-	return i2c_new_dummy(client->adapter, addr);
-}
-EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
-
-/* ------------------------------------------------------------------------- */
-
-/* I2C bus adapters -- one roots each I2C or SMBUS segment */
-
-static void i2c_adapter_dev_release(struct device *dev)
-{
-	struct i2c_adapter *adap = to_i2c_adapter(dev);
-	complete(&adap->dev_released);
-}
-
-unsigned int i2c_adapter_depth(struct i2c_adapter *adapter)
-{
-	unsigned int depth = 0;
-
-	while ((adapter = i2c_parent_is_i2c_adapter(adapter)))
-		depth++;
-
-	WARN_ONCE(depth >= MAX_LOCKDEP_SUBCLASSES,
-		  "adapter depth exceeds lockdep subclass limit\n");
-
-	return depth;
-}
-EXPORT_SYMBOL_GPL(i2c_adapter_depth);
-
-/*
- * Let users instantiate I2C devices through sysfs. This can be used when
- * platform initialization code doesn't contain the proper data for
- * whatever reason. Also useful for drivers that do device detection and
- * detection fails, either because the device uses an unexpected address,
- * or this is a compatible device with different ID register values.
- *
- * Parameter checking may look overzealous, but we really don't want
- * the user to provide incorrect parameters.
- */
-static ssize_t
-i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
-		     const char *buf, size_t count)
-{
-	struct i2c_adapter *adap = to_i2c_adapter(dev);
-	struct i2c_board_info info;
-	struct i2c_client *client;
-	char *blank, end;
-	int res;
-
-	memset(&info, 0, sizeof(struct i2c_board_info));
-
-	blank = strchr(buf, ' ');
-	if (!blank) {
-		dev_err(dev, "%s: Missing parameters\n", "new_device");
-		return -EINVAL;
-	}
-	if (blank - buf > I2C_NAME_SIZE - 1) {
-		dev_err(dev, "%s: Invalid device name\n", "new_device");
-		return -EINVAL;
-	}
-	memcpy(info.type, buf, blank - buf);
-
-	/* Parse remaining parameters, reject extra parameters */
-	res = sscanf(++blank, "%hi%c", &info.addr, &end);
-	if (res < 1) {
-		dev_err(dev, "%s: Can't parse I2C address\n", "new_device");
-		return -EINVAL;
-	}
-	if (res > 1  && end != '\n') {
-		dev_err(dev, "%s: Extra parameters\n", "new_device");
-		return -EINVAL;
-	}
-
-	if ((info.addr & I2C_ADDR_OFFSET_TEN_BIT) == I2C_ADDR_OFFSET_TEN_BIT) {
-		info.addr &= ~I2C_ADDR_OFFSET_TEN_BIT;
-		info.flags |= I2C_CLIENT_TEN;
-	}
-
-	if (info.addr & I2C_ADDR_OFFSET_SLAVE) {
-		info.addr &= ~I2C_ADDR_OFFSET_SLAVE;
-		info.flags |= I2C_CLIENT_SLAVE;
-	}
-
-	client = i2c_new_device(adap, &info);
-	if (!client)
-		return -EINVAL;
-
-	/* Keep track of the added device */
-	mutex_lock(&adap->userspace_clients_lock);
-	list_add_tail(&client->detected, &adap->userspace_clients);
-	mutex_unlock(&adap->userspace_clients_lock);
-	dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
-		 info.type, info.addr);
-
-	return count;
-}
-static DEVICE_ATTR(new_device, S_IWUSR, NULL, i2c_sysfs_new_device);
-
-/*
- * And of course let the users delete the devices they instantiated, if
- * they got it wrong. This interface can only be used to delete devices
- * instantiated by i2c_sysfs_new_device above. This guarantees that we
- * don't delete devices to which some kernel code still has references.
- *
- * Parameter checking may look overzealous, but we really don't want
- * the user to delete the wrong device.
- */
-static ssize_t
-i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
-{
-	struct i2c_adapter *adap = to_i2c_adapter(dev);
-	struct i2c_client *client, *next;
-	unsigned short addr;
-	char end;
-	int res;
-
-	/* Parse parameters, reject extra parameters */
-	res = sscanf(buf, "%hi%c", &addr, &end);
-	if (res < 1) {
-		dev_err(dev, "%s: Can't parse I2C address\n", "delete_device");
-		return -EINVAL;
-	}
-	if (res > 1  && end != '\n') {
-		dev_err(dev, "%s: Extra parameters\n", "delete_device");
-		return -EINVAL;
-	}
-
-	/* Make sure the device was added through sysfs */
-	res = -ENOENT;
-	mutex_lock_nested(&adap->userspace_clients_lock,
-			  i2c_adapter_depth(adap));
-	list_for_each_entry_safe(client, next, &adap->userspace_clients,
-				 detected) {
-		if (i2c_encode_flags_to_addr(client) == addr) {
-			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
-				 "delete_device", client->name, client->addr);
-
-			list_del(&client->detected);
-			i2c_unregister_device(client);
-			res = count;
-			break;
-		}
-	}
-	mutex_unlock(&adap->userspace_clients_lock);
-
-	if (res < 0)
-		dev_err(dev, "%s: Can't find device in list\n",
-			"delete_device");
-	return res;
-}
-static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL,
-				   i2c_sysfs_delete_device);
-
-static struct attribute *i2c_adapter_attrs[] = {
-	&dev_attr_name.attr,
-	&dev_attr_new_device.attr,
-	&dev_attr_delete_device.attr,
-	NULL
-};
-ATTRIBUTE_GROUPS(i2c_adapter);
-
-struct device_type i2c_adapter_type = {
-	.groups		= i2c_adapter_groups,
-	.release	= i2c_adapter_dev_release,
-};
-EXPORT_SYMBOL_GPL(i2c_adapter_type);
-
-/**
- * i2c_verify_adapter - return parameter as i2c_adapter or NULL
- * @dev: device, probably from some driver model iterator
- *
- * When traversing the driver model tree, perhaps using driver model
- * iterators like @device_for_each_child(), you can't assume very much
- * about the nodes you find.  Use this function to avoid oopses caused
- * by wrongly treating some non-I2C device as an i2c_adapter.
- */
-struct i2c_adapter *i2c_verify_adapter(struct device *dev)
-{
-	return (dev->type == &i2c_adapter_type)
-			? to_i2c_adapter(dev)
-			: NULL;
-}
-EXPORT_SYMBOL(i2c_verify_adapter);
-
-#ifdef CONFIG_I2C_COMPAT
-static struct class_compat *i2c_adapter_compat_class;
-#endif
-
-static void i2c_scan_static_board_info(struct i2c_adapter *adapter)
-{
-	struct i2c_devinfo	*devinfo;
-
-	down_read(&__i2c_board_lock);
-	list_for_each_entry(devinfo, &__i2c_board_list, list) {
-		if (devinfo->busnum == adapter->nr
-				&& !i2c_new_device(adapter,
-						&devinfo->board_info))
-			dev_err(&adapter->dev,
-				"Can't create device at 0x%02x\n",
-				devinfo->board_info.addr);
-	}
-	up_read(&__i2c_board_lock);
-}
-
-/* OF support code */
-
-#if IS_ENABLED(CONFIG_OF)
-static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
-						 struct device_node *node)
-{
-	struct i2c_client *result;
-	struct i2c_board_info info = {};
-	struct dev_archdata dev_ad = {};
-	const __be32 *addr_be;
-	u32 addr;
-	int len;
-
-	dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name);
-
-	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
-		dev_err(&adap->dev, "of_i2c: modalias failure on %s\n",
-			node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	addr_be = of_get_property(node, "reg", &len);
-	if (!addr_be || (len < sizeof(*addr_be))) {
-		dev_err(&adap->dev, "of_i2c: invalid reg on %s\n",
-			node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	addr = be32_to_cpup(addr_be);
-	if (addr & I2C_TEN_BIT_ADDRESS) {
-		addr &= ~I2C_TEN_BIT_ADDRESS;
-		info.flags |= I2C_CLIENT_TEN;
-	}
-
-	if (addr & I2C_OWN_SLAVE_ADDRESS) {
-		addr &= ~I2C_OWN_SLAVE_ADDRESS;
-		info.flags |= I2C_CLIENT_SLAVE;
-	}
-
-	if (i2c_check_addr_validity(addr, info.flags)) {
-		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n",
-			addr, node->full_name);
-		return ERR_PTR(-EINVAL);
-	}
-
-	info.addr = addr;
-	info.of_node = of_node_get(node);
-	info.archdata = &dev_ad;
-
-	if (of_property_read_bool(node, "host-notify"))
-		info.flags |= I2C_CLIENT_HOST_NOTIFY;
-
-	if (of_get_property(node, "wakeup-source", NULL))
-		info.flags |= I2C_CLIENT_WAKE;
-
-	result = i2c_new_device(adap, &info);
-	if (result == NULL) {
-		dev_err(&adap->dev, "of_i2c: Failure registering %s\n",
-			node->full_name);
-		of_node_put(node);
-		return ERR_PTR(-EINVAL);
-	}
-	return result;
-}
-
-static void of_i2c_register_devices(struct i2c_adapter *adap)
-{
-	struct device_node *bus, *node;
-	struct i2c_client *client;
-
-	/* Only register child devices if the adapter has a node pointer set */
-	if (!adap->dev.of_node)
-		return;
-
-	dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
-
-	bus = of_get_child_by_name(adap->dev.of_node, "i2c-bus");
-	if (!bus)
-		bus = of_node_get(adap->dev.of_node);
-
-	for_each_available_child_of_node(bus, node) {
-		if (of_node_test_and_set_flag(node, OF_POPULATED))
-			continue;
-
-		client = of_i2c_register_device(adap, node);
-		if (IS_ERR(client)) {
-			dev_warn(&adap->dev,
-				 "Failed to create I2C device for %s\n",
-				 node->full_name);
-			of_node_clear_flag(node, OF_POPULATED);
-		}
-	}
-
-	of_node_put(bus);
-}
-
-static int of_dev_node_match(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
-/* must call put_device() when done with returned i2c_client device */
-struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
-{
-	struct device *dev;
-	struct i2c_client *client;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
-	if (!dev)
-		return NULL;
-
-	client = i2c_verify_client(dev);
-	if (!client)
-		put_device(dev);
-
-	return client;
-}
-EXPORT_SYMBOL(of_find_i2c_device_by_node);
-
-/* must call put_device() when done with returned i2c_adapter device */
-struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node)
-{
-	struct device *dev;
-	struct i2c_adapter *adapter;
-
-	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
-	if (!dev)
-		return NULL;
-
-	adapter = i2c_verify_adapter(dev);
-	if (!adapter)
-		put_device(dev);
-
-	return adapter;
-}
-EXPORT_SYMBOL(of_find_i2c_adapter_by_node);
-
-/* must call i2c_put_adapter() when done with returned i2c_adapter device */
-struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node)
-{
-	struct i2c_adapter *adapter;
-
-	adapter = of_find_i2c_adapter_by_node(node);
-	if (!adapter)
-		return NULL;
-
-	if (!try_module_get(adapter->owner)) {
-		put_device(&adapter->dev);
-		adapter = NULL;
-	}
-
-	return adapter;
-}
-EXPORT_SYMBOL(of_get_i2c_adapter_by_node);
-
-static const struct of_device_id*
-i2c_of_match_device_sysfs(const struct of_device_id *matches,
-				  struct i2c_client *client)
-{
-	const char *name;
-
-	for (; matches->compatible[0]; matches++) {
-		/*
-		 * Adding devices through the i2c sysfs interface provides us
-		 * a string to match which may be compatible with the device
-		 * tree compatible strings, however with no actual of_node the
-		 * of_match_device() will not match
-		 */
-		if (sysfs_streq(client->name, matches->compatible))
-			return matches;
-
-		name = strchr(matches->compatible, ',');
-		if (!name)
-			name = matches->compatible;
-		else
-			name++;
-
-		if (sysfs_streq(client->name, name))
-			return matches;
-	}
-
-	return NULL;
-}
-
-const struct of_device_id
-*i2c_of_match_device(const struct of_device_id *matches,
-		     struct i2c_client *client)
-{
-	const struct of_device_id *match;
-
-	if (!(client && matches))
-		return NULL;
-
-	match = of_match_device(matches, &client->dev);
-	if (match)
-		return match;
-
-	return i2c_of_match_device_sysfs(matches, client);
-}
-EXPORT_SYMBOL_GPL(i2c_of_match_device);
-#else
-static void of_i2c_register_devices(struct i2c_adapter *adap) { }
-#endif /* CONFIG_OF */
-
-static int i2c_do_add_adapter(struct i2c_driver *driver,
-			      struct i2c_adapter *adap)
-{
-	/* Detect supported devices on that bus, and instantiate them */
-	i2c_detect(adap, driver);
-
-	/* Let legacy drivers scan this bus for matching devices */
-	if (driver->attach_adapter) {
-		dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n",
-			 driver->driver.name);
-		dev_warn(&adap->dev,
-			 "Please use another way to instantiate your i2c_client\n");
-		/* We ignore the return code; if it fails, too bad */
-		driver->attach_adapter(adap);
-	}
-	return 0;
-}
-
-static int __process_new_adapter(struct device_driver *d, void *data)
-{
-	return i2c_do_add_adapter(to_i2c_driver(d), data);
-}
-
-static const struct i2c_lock_operations i2c_adapter_lock_ops = {
-	.lock_bus =    i2c_adapter_lock_bus,
-	.trylock_bus = i2c_adapter_trylock_bus,
-	.unlock_bus =  i2c_adapter_unlock_bus,
-};
-
-static void i2c_host_notify_irq_teardown(struct i2c_adapter *adap)
-{
-	struct irq_domain *domain = adap->host_notify_domain;
-	irq_hw_number_t hwirq;
-
-	if (!domain)
-		return;
-
-	for (hwirq = 0 ; hwirq < I2C_ADDR_7BITS_COUNT ; hwirq++)
-		irq_dispose_mapping(irq_find_mapping(domain, hwirq));
-
-	irq_domain_remove(domain);
-	adap->host_notify_domain = NULL;
-}
-
-static int i2c_host_notify_irq_map(struct irq_domain *h,
-					  unsigned int virq,
-					  irq_hw_number_t hw_irq_num)
-{
-	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
-
-	return 0;
-}
-
-static const struct irq_domain_ops i2c_host_notify_irq_ops = {
-	.map = i2c_host_notify_irq_map,
-};
-
-static int i2c_setup_host_notify_irq_domain(struct i2c_adapter *adap)
-{
-	struct irq_domain *domain;
-
-	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_HOST_NOTIFY))
-		return 0;
-
-	domain = irq_domain_create_linear(adap->dev.fwnode,
-					  I2C_ADDR_7BITS_COUNT,
-					  &i2c_host_notify_irq_ops, adap);
-	if (!domain)
-		return -ENOMEM;
-
-	adap->host_notify_domain = domain;
-
-	return 0;
-}
-
-/**
- * i2c_handle_smbus_host_notify - Forward a Host Notify event to the correct
- * I2C client.
- * @adap: the adapter
- * @addr: the I2C address of the notifying device
- * Context: can't sleep
- *
- * Helper function to be called from an I2C bus driver's interrupt
- * handler. It will schedule the Host Notify IRQ.
- */
-int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
-{
-	int irq;
-
-	if (!adap)
-		return -EINVAL;
-
-	irq = irq_find_mapping(adap->host_notify_domain, addr);
-	if (irq <= 0)
-		return -ENXIO;
-
-	generic_handle_irq(irq);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(i2c_handle_smbus_host_notify);
-
-static int i2c_register_adapter(struct i2c_adapter *adap)
-{
-	int res = -EINVAL;
-
-	/* Can't register until after driver model init */
-	if (WARN_ON(!is_registered)) {
-		res = -EAGAIN;
-		goto out_list;
-	}
-
-	/* Sanity checks */
-	if (WARN(!adap->name[0], "i2c adapter has no name"))
-		goto out_list;
-
-	if (!adap->algo) {
-		pr_err("adapter '%s': no algo supplied!\n", adap->name);
-		goto out_list;
-	}
-
-	if (!adap->lock_ops)
-		adap->lock_ops = &i2c_adapter_lock_ops;
-
-	rt_mutex_init(&adap->bus_lock);
-	rt_mutex_init(&adap->mux_lock);
-	mutex_init(&adap->userspace_clients_lock);
-	INIT_LIST_HEAD(&adap->userspace_clients);
-
-	/* Set default timeout to 1 second if not already set */
-	if (adap->timeout == 0)
-		adap->timeout = HZ;
-
-	/* register soft irqs for Host Notify */
-	res = i2c_setup_host_notify_irq_domain(adap);
-	if (res) {
-		pr_err("adapter '%s': can't create Host Notify IRQs (%d)\n",
-		       adap->name, res);
-		goto out_list;
-	}
-
-	dev_set_name(&adap->dev, "i2c-%d", adap->nr);
-	adap->dev.bus = &i2c_bus_type;
-	adap->dev.type = &i2c_adapter_type;
-	res = device_register(&adap->dev);
-	if (res) {
-		pr_err("adapter '%s': can't register device (%d)\n", adap->name, res);
-		goto out_list;
-	}
-
-	dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
-
-	pm_runtime_no_callbacks(&adap->dev);
-	pm_suspend_ignore_children(&adap->dev, true);
-	pm_runtime_enable(&adap->dev);
-
-#ifdef CONFIG_I2C_COMPAT
-	res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
-				       adap->dev.parent);
-	if (res)
-		dev_warn(&adap->dev,
-			 "Failed to create compatibility class link\n");
-#endif
-
-	i2c_init_recovery(adap);
-
-	/* create pre-declared device nodes */
-	of_i2c_register_devices(adap);
-	i2c_acpi_register_devices(adap);
-	i2c_acpi_install_space_handler(adap);
-
-	if (adap->nr < __i2c_first_dynamic_bus_num)
-		i2c_scan_static_board_info(adap);
-
-	/* Notify drivers */
-	mutex_lock(&core_lock);
-	bus_for_each_drv(&i2c_bus_type, NULL, adap, __process_new_adapter);
-	mutex_unlock(&core_lock);
-
-	return 0;
-
-out_list:
-	mutex_lock(&core_lock);
-	idr_remove(&i2c_adapter_idr, adap->nr);
-	mutex_unlock(&core_lock);
-	return res;
-}
-
-/**
- * __i2c_add_numbered_adapter - i2c_add_numbered_adapter where nr is never -1
- * @adap: the adapter to register (with adap->nr initialized)
- * Context: can sleep
- *
- * See i2c_add_numbered_adapter() for details.
- */
-static int __i2c_add_numbered_adapter(struct i2c_adapter *adap)
-{
-	int id;
-
-	mutex_lock(&core_lock);
-	id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1, GFP_KERNEL);
-	mutex_unlock(&core_lock);
-	if (WARN(id < 0, "couldn't get idr"))
-		return id == -ENOSPC ? -EBUSY : id;
-
-	return i2c_register_adapter(adap);
-}
-
-/**
- * i2c_add_adapter - declare i2c adapter, use dynamic bus number
- * @adapter: the adapter to add
- * Context: can sleep
- *
- * This routine is used to declare an I2C adapter when its bus number
- * doesn't matter or when its bus number is specified by an dt alias.
- * Examples of bases when the bus number doesn't matter: I2C adapters
- * dynamically added by USB links or PCI plugin cards.
- *
- * When this returns zero, a new bus number was allocated and stored
- * in adap->nr, and the specified adapter became available for clients.
- * Otherwise, a negative errno value is returned.
- */
-int i2c_add_adapter(struct i2c_adapter *adapter)
-{
-	struct device *dev = &adapter->dev;
-	int id;
-
-	if (dev->of_node) {
-		id = of_alias_get_id(dev->of_node, "i2c");
-		if (id >= 0) {
-			adapter->nr = id;
-			return __i2c_add_numbered_adapter(adapter);
-		}
-	}
-
-	mutex_lock(&core_lock);
-	id = idr_alloc(&i2c_adapter_idr, adapter,
-		       __i2c_first_dynamic_bus_num, 0, GFP_KERNEL);
-	mutex_unlock(&core_lock);
-	if (WARN(id < 0, "couldn't get idr"))
-		return id;
-
-	adapter->nr = id;
-
-	return i2c_register_adapter(adapter);
-}
-EXPORT_SYMBOL(i2c_add_adapter);
-
-/**
- * i2c_add_numbered_adapter - declare i2c adapter, use static bus number
- * @adap: the adapter to register (with adap->nr initialized)
- * Context: can sleep
- *
- * This routine is used to declare an I2C adapter when its bus number
- * matters.  For example, use it for I2C adapters from system-on-chip CPUs,
- * or otherwise built in to the system's mainboard, and where i2c_board_info
- * is used to properly configure I2C devices.
- *
- * If the requested bus number is set to -1, then this function will behave
- * identically to i2c_add_adapter, and will dynamically assign a bus number.
- *
- * If no devices have pre-been declared for this bus, then be sure to
- * register the adapter before any dynamically allocated ones.  Otherwise
- * the required bus ID may not be available.
- *
- * When this returns zero, the specified adapter became available for
- * clients using the bus number provided in adap->nr.  Also, the table
- * of I2C devices pre-declared using i2c_register_board_info() is scanned,
- * and the appropriate driver model device nodes are created.  Otherwise, a
- * negative errno value is returned.
- */
-int i2c_add_numbered_adapter(struct i2c_adapter *adap)
-{
-	if (adap->nr == -1) /* -1 means dynamically assign bus id */
-		return i2c_add_adapter(adap);
-
-	return __i2c_add_numbered_adapter(adap);
-}
-EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter);
-
-static void i2c_do_del_adapter(struct i2c_driver *driver,
-			      struct i2c_adapter *adapter)
-{
-	struct i2c_client *client, *_n;
-
-	/* Remove the devices we created ourselves as the result of hardware
-	 * probing (using a driver's detect method) */
-	list_for_each_entry_safe(client, _n, &driver->clients, detected) {
-		if (client->adapter == adapter) {
-			dev_dbg(&adapter->dev, "Removing %s at 0x%x\n",
-				client->name, client->addr);
-			list_del(&client->detected);
-			i2c_unregister_device(client);
-		}
-	}
-}
-
-static int __unregister_client(struct device *dev, void *dummy)
-{
-	struct i2c_client *client = i2c_verify_client(dev);
-	if (client && strcmp(client->name, "dummy"))
-		i2c_unregister_device(client);
-	return 0;
-}
-
-static int __unregister_dummy(struct device *dev, void *dummy)
-{
-	struct i2c_client *client = i2c_verify_client(dev);
-	if (client)
-		i2c_unregister_device(client);
-	return 0;
-}
-
-static int __process_removed_adapter(struct device_driver *d, void *data)
-{
-	i2c_do_del_adapter(to_i2c_driver(d), data);
-	return 0;
-}
-
-/**
- * i2c_del_adapter - unregister I2C adapter
- * @adap: the adapter being unregistered
- * Context: can sleep
- *
- * This unregisters an I2C adapter which was previously registered
- * by @i2c_add_adapter or @i2c_add_numbered_adapter.
- */
-void i2c_del_adapter(struct i2c_adapter *adap)
-{
-	struct i2c_adapter *found;
-	struct i2c_client *client, *next;
-
-	/* First make sure that this adapter was ever added */
-	mutex_lock(&core_lock);
-	found = idr_find(&i2c_adapter_idr, adap->nr);
-	mutex_unlock(&core_lock);
-	if (found != adap) {
-		pr_debug("attempting to delete unregistered adapter [%s]\n", adap->name);
-		return;
-	}
-
-	i2c_acpi_remove_space_handler(adap);
-	/* Tell drivers about this removal */
-	mutex_lock(&core_lock);
-	bus_for_each_drv(&i2c_bus_type, NULL, adap,
-			       __process_removed_adapter);
-	mutex_unlock(&core_lock);
-
-	/* Remove devices instantiated from sysfs */
-	mutex_lock_nested(&adap->userspace_clients_lock,
-			  i2c_adapter_depth(adap));
-	list_for_each_entry_safe(client, next, &adap->userspace_clients,
-				 detected) {
-		dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
-			client->addr);
-		list_del(&client->detected);
-		i2c_unregister_device(client);
-	}
-	mutex_unlock(&adap->userspace_clients_lock);
-
-	/* Detach any active clients. This can't fail, thus we do not
-	 * check the returned value. This is a two-pass process, because
-	 * we can't remove the dummy devices during the first pass: they
-	 * could have been instantiated by real devices wishing to clean
-	 * them up properly, so we give them a chance to do that first. */
-	device_for_each_child(&adap->dev, NULL, __unregister_client);
-	device_for_each_child(&adap->dev, NULL, __unregister_dummy);
-
-#ifdef CONFIG_I2C_COMPAT
-	class_compat_remove_link(i2c_adapter_compat_class, &adap->dev,
-				 adap->dev.parent);
-#endif
-
-	/* device name is gone after device_unregister */
-	dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name);
-
-	pm_runtime_disable(&adap->dev);
-
-	i2c_host_notify_irq_teardown(adap);
-
-	/* wait until all references to the device are gone
-	 *
-	 * FIXME: This is old code and should ideally be replaced by an
-	 * alternative which results in decoupling the lifetime of the struct
-	 * device from the i2c_adapter, like spi or netdev do. Any solution
-	 * should be thoroughly tested with DEBUG_KOBJECT_RELEASE enabled!
-	 */
-	init_completion(&adap->dev_released);
-	device_unregister(&adap->dev);
-	wait_for_completion(&adap->dev_released);
-
-	/* free bus id */
-	mutex_lock(&core_lock);
-	idr_remove(&i2c_adapter_idr, adap->nr);
-	mutex_unlock(&core_lock);
-
-	/* Clear the device structure in case this adapter is ever going to be
-	   added again */
-	memset(&adap->dev, 0, sizeof(adap->dev));
-}
-EXPORT_SYMBOL(i2c_del_adapter);
-
-/**
- * i2c_parse_fw_timings - get I2C related timing parameters from firmware
- * @dev: The device to scan for I2C timing properties
- * @t: the i2c_timings struct to be filled with values
- * @use_defaults: bool to use sane defaults derived from the I2C specification
- *		  when properties are not found, otherwise use 0
- *
- * Scan the device for the generic I2C properties describing timing parameters
- * for the signal and fill the given struct with the results. If a property was
- * not found and use_defaults was true, then maximum timings are assumed which
- * are derived from the I2C specification. If use_defaults is not used, the
- * results will be 0, so drivers can apply their own defaults later. The latter
- * is mainly intended for avoiding regressions of existing drivers which want
- * to switch to this function. New drivers almost always should use the defaults.
- */
-
-void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_defaults)
-{
-	int ret;
-
-	memset(t, 0, sizeof(*t));
-
-	ret = device_property_read_u32(dev, "clock-frequency", &t->bus_freq_hz);
-	if (ret && use_defaults)
-		t->bus_freq_hz = 100000;
-
-	ret = device_property_read_u32(dev, "i2c-scl-rising-time-ns", &t->scl_rise_ns);
-	if (ret && use_defaults) {
-		if (t->bus_freq_hz <= 100000)
-			t->scl_rise_ns = 1000;
-		else if (t->bus_freq_hz <= 400000)
-			t->scl_rise_ns = 300;
-		else
-			t->scl_rise_ns = 120;
-	}
-
-	ret = device_property_read_u32(dev, "i2c-scl-falling-time-ns", &t->scl_fall_ns);
-	if (ret && use_defaults) {
-		if (t->bus_freq_hz <= 400000)
-			t->scl_fall_ns = 300;
-		else
-			t->scl_fall_ns = 120;
-	}
-
-	device_property_read_u32(dev, "i2c-scl-internal-delay-ns", &t->scl_int_delay_ns);
-
-	ret = device_property_read_u32(dev, "i2c-sda-falling-time-ns", &t->sda_fall_ns);
-	if (ret && use_defaults)
-		t->sda_fall_ns = t->scl_fall_ns;
-}
-EXPORT_SYMBOL_GPL(i2c_parse_fw_timings);
-
-/* ------------------------------------------------------------------------- */
-
-int i2c_for_each_dev(void *data, int (*fn)(struct device *, void *))
-{
-	int res;
-
-	mutex_lock(&core_lock);
-	res = bus_for_each_dev(&i2c_bus_type, NULL, data, fn);
-	mutex_unlock(&core_lock);
-
-	return res;
-}
-EXPORT_SYMBOL_GPL(i2c_for_each_dev);
-
-static int __process_new_driver(struct device *dev, void *data)
-{
-	if (dev->type != &i2c_adapter_type)
-		return 0;
-	return i2c_do_add_adapter(data, to_i2c_adapter(dev));
-}
-
-/*
- * An i2c_driver is used with one or more i2c_client (device) nodes to access
- * i2c slave chips, on a bus instance associated with some i2c_adapter.
- */
-
-int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
-{
-	int res;
-
-	/* Can't register until after driver model init */
-	if (WARN_ON(!is_registered))
-		return -EAGAIN;
-
-	/* add the driver to the list of i2c drivers in the driver core */
-	driver->driver.owner = owner;
-	driver->driver.bus = &i2c_bus_type;
-	INIT_LIST_HEAD(&driver->clients);
-
-	/* When registration returns, the driver core
-	 * will have called probe() for all matching-but-unbound devices.
-	 */
-	res = driver_register(&driver->driver);
-	if (res)
-		return res;
-
-	pr_debug("driver [%s] registered\n", driver->driver.name);
-
-	/* Walk the adapters that are already present */
-	i2c_for_each_dev(driver, __process_new_driver);
-
-	return 0;
-}
-EXPORT_SYMBOL(i2c_register_driver);
-
-static int __process_removed_driver(struct device *dev, void *data)
-{
-	if (dev->type == &i2c_adapter_type)
-		i2c_do_del_adapter(data, to_i2c_adapter(dev));
-	return 0;
-}
-
-/**
- * i2c_del_driver - unregister I2C driver
- * @driver: the driver being unregistered
- * Context: can sleep
- */
-void i2c_del_driver(struct i2c_driver *driver)
-{
-	i2c_for_each_dev(driver, __process_removed_driver);
-
-	driver_unregister(&driver->driver);
-	pr_debug("driver [%s] unregistered\n", driver->driver.name);
-}
-EXPORT_SYMBOL(i2c_del_driver);
-
-/* ------------------------------------------------------------------------- */
-
-/**
- * i2c_use_client - increments the reference count of the i2c client structure
- * @client: the client being referenced
- *
- * Each live reference to a client should be refcounted. The driver model does
- * that automatically as part of driver binding, so that most drivers don't
- * need to do this explicitly: they hold a reference until they're unbound
- * from the device.
- *
- * A pointer to the client with the incremented reference counter is returned.
- */
-struct i2c_client *i2c_use_client(struct i2c_client *client)
-{
-	if (client && get_device(&client->dev))
-		return client;
-	return NULL;
-}
-EXPORT_SYMBOL(i2c_use_client);
-
-/**
- * i2c_release_client - release a use of the i2c client structure
- * @client: the client being no longer referenced
- *
- * Must be called when a user of a client is finished with it.
- */
-void i2c_release_client(struct i2c_client *client)
-{
-	if (client)
-		put_device(&client->dev);
-}
-EXPORT_SYMBOL(i2c_release_client);
-
-struct i2c_cmd_arg {
-	unsigned	cmd;
-	void		*arg;
-};
-
-static int i2c_cmd(struct device *dev, void *_arg)
-{
-	struct i2c_client	*client = i2c_verify_client(dev);
-	struct i2c_cmd_arg	*arg = _arg;
-	struct i2c_driver	*driver;
-
-	if (!client || !client->dev.driver)
-		return 0;
-
-	driver = to_i2c_driver(client->dev.driver);
-	if (driver->command)
-		driver->command(client, arg->cmd, arg->arg);
-	return 0;
-}
-
-void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg)
-{
-	struct i2c_cmd_arg	cmd_arg;
-
-	cmd_arg.cmd = cmd;
-	cmd_arg.arg = arg;
-	device_for_each_child(&adap->dev, &cmd_arg, i2c_cmd);
-}
-EXPORT_SYMBOL(i2c_clients_command);
-
-#if IS_ENABLED(CONFIG_OF_DYNAMIC)
-static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
-			 void *arg)
-{
-	struct of_reconfig_data *rd = arg;
-	struct i2c_adapter *adap;
-	struct i2c_client *client;
-
-	switch (of_reconfig_get_state_change(action, rd)) {
-	case OF_RECONFIG_CHANGE_ADD:
-		adap = of_find_i2c_adapter_by_node(rd->dn->parent);
-		if (adap == NULL)
-			return NOTIFY_OK;	/* not for us */
-
-		if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) {
-			put_device(&adap->dev);
-			return NOTIFY_OK;
-		}
-
-		client = of_i2c_register_device(adap, rd->dn);
-		put_device(&adap->dev);
-
-		if (IS_ERR(client)) {
-			dev_err(&adap->dev, "failed to create client for '%s'\n",
-				 rd->dn->full_name);
-			of_node_clear_flag(rd->dn, OF_POPULATED);
-			return notifier_from_errno(PTR_ERR(client));
-		}
-		break;
-	case OF_RECONFIG_CHANGE_REMOVE:
-		/* already depopulated? */
-		if (!of_node_check_flag(rd->dn, OF_POPULATED))
-			return NOTIFY_OK;
-
-		/* find our device by node */
-		client = of_find_i2c_device_by_node(rd->dn);
-		if (client == NULL)
-			return NOTIFY_OK;	/* no? not meant for us */
-
-		/* unregister takes one ref away */
-		i2c_unregister_device(client);
-
-		/* and put the reference of the find */
-		put_device(&client->dev);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-static struct notifier_block i2c_of_notifier = {
-	.notifier_call = of_i2c_notify,
-};
-#else
-extern struct notifier_block i2c_of_notifier;
-#endif /* CONFIG_OF_DYNAMIC */
-
-static int __init i2c_init(void)
-{
-	int retval;
-
-	retval = of_alias_get_highest_id("i2c");
-
-	down_write(&__i2c_board_lock);
-	if (retval >= __i2c_first_dynamic_bus_num)
-		__i2c_first_dynamic_bus_num = retval + 1;
-	up_write(&__i2c_board_lock);
-
-	retval = bus_register(&i2c_bus_type);
-	if (retval)
-		return retval;
-
-	is_registered = true;
-
-#ifdef CONFIG_I2C_COMPAT
-	i2c_adapter_compat_class = class_compat_register("i2c-adapter");
-	if (!i2c_adapter_compat_class) {
-		retval = -ENOMEM;
-		goto bus_err;
-	}
-#endif
-	retval = i2c_add_driver(&dummy_driver);
-	if (retval)
-		goto class_err;
-
-	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
-		WARN_ON(of_reconfig_notifier_register(&i2c_of_notifier));
-	if (IS_ENABLED(CONFIG_ACPI))
-		WARN_ON(acpi_reconfig_notifier_register(&i2c_acpi_notifier));
-
-	return 0;
-
-class_err:
-#ifdef CONFIG_I2C_COMPAT
-	class_compat_unregister(i2c_adapter_compat_class);
-bus_err:
-#endif
-	is_registered = false;
-	bus_unregister(&i2c_bus_type);
-	return retval;
-}
-
-static void __exit i2c_exit(void)
-{
-	if (IS_ENABLED(CONFIG_ACPI))
-		WARN_ON(acpi_reconfig_notifier_unregister(&i2c_acpi_notifier));
-	if (IS_ENABLED(CONFIG_OF_DYNAMIC))
-		WARN_ON(of_reconfig_notifier_unregister(&i2c_of_notifier));
-	i2c_del_driver(&dummy_driver);
-#ifdef CONFIG_I2C_COMPAT
-	class_compat_unregister(i2c_adapter_compat_class);
-#endif
-	bus_unregister(&i2c_bus_type);
-	tracepoint_synchronize_unregister();
-}
-
-/* We must initialize early, because some subsystems register i2c drivers
- * in subsys_initcall() code, but are linked (and initialized) before i2c.
- */
-postcore_initcall(i2c_init);
-module_exit(i2c_exit);
-
-/* ----------------------------------------------------
- * the functional interface to the i2c busses.
- * ----------------------------------------------------
- */
-
-/* Check if val is exceeding the quirk IFF quirk is non 0 */
-#define i2c_quirk_exceeded(val, quirk) ((quirk) && ((val) > (quirk)))
-
-static int i2c_quirk_error(struct i2c_adapter *adap, struct i2c_msg *msg, char *err_msg)
-{
-	dev_err_ratelimited(&adap->dev, "adapter quirk: %s (addr 0x%04x, size %u, %s)\n",
-			    err_msg, msg->addr, msg->len,
-			    msg->flags & I2C_M_RD ? "read" : "write");
-	return -EOPNOTSUPP;
-}
-
-static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
-{
-	const struct i2c_adapter_quirks *q = adap->quirks;
-	int max_num = q->max_num_msgs, i;
-	bool do_len_check = true;
-
-	if (q->flags & I2C_AQ_COMB) {
-		max_num = 2;
-
-		/* special checks for combined messages */
-		if (num == 2) {
-			if (q->flags & I2C_AQ_COMB_WRITE_FIRST && msgs[0].flags & I2C_M_RD)
-				return i2c_quirk_error(adap, &msgs[0], "1st comb msg must be write");
-
-			if (q->flags & I2C_AQ_COMB_READ_SECOND && !(msgs[1].flags & I2C_M_RD))
-				return i2c_quirk_error(adap, &msgs[1], "2nd comb msg must be read");
-
-			if (q->flags & I2C_AQ_COMB_SAME_ADDR && msgs[0].addr != msgs[1].addr)
-				return i2c_quirk_error(adap, &msgs[0], "comb msg only to same addr");
-
-			if (i2c_quirk_exceeded(msgs[0].len, q->max_comb_1st_msg_len))
-				return i2c_quirk_error(adap, &msgs[0], "msg too long");
-
-			if (i2c_quirk_exceeded(msgs[1].len, q->max_comb_2nd_msg_len))
-				return i2c_quirk_error(adap, &msgs[1], "msg too long");
-
-			do_len_check = false;
-		}
-	}
-
-	if (i2c_quirk_exceeded(num, max_num))
-		return i2c_quirk_error(adap, &msgs[0], "too many messages");
-
-	for (i = 0; i < num; i++) {
-		u16 len = msgs[i].len;
-
-		if (msgs[i].flags & I2C_M_RD) {
-			if (do_len_check && i2c_quirk_exceeded(len, q->max_read_len))
-				return i2c_quirk_error(adap, &msgs[i], "msg too long");
-		} else {
-			if (do_len_check && i2c_quirk_exceeded(len, q->max_write_len))
-				return i2c_quirk_error(adap, &msgs[i], "msg too long");
-		}
-	}
-
-	return 0;
-}
-
-/**
- * __i2c_transfer - unlocked flavor of i2c_transfer
- * @adap: Handle to I2C bus
- * @msgs: One or more messages to execute before STOP is issued to
- *	terminate the operation; each message begins with a START.
- * @num: Number of messages to be executed.
- *
- * Returns negative errno, else the number of messages executed.
- *
- * Adapter lock must be held when calling this function. No debug logging
- * takes place. adap->algo->master_xfer existence isn't checked.
- */
-int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
-{
-	unsigned long orig_jiffies;
-	int ret, try;
-
-	if (adap->quirks && i2c_check_for_quirks(adap, msgs, num))
-		return -EOPNOTSUPP;
-
-	/* i2c_trace_msg gets enabled when tracepoint i2c_transfer gets
-	 * enabled.  This is an efficient way of keeping the for-loop from
-	 * being executed when not needed.
-	 */
-	if (static_key_false(&i2c_trace_msg)) {
-		int i;
-		for (i = 0; i < num; i++)
-			if (msgs[i].flags & I2C_M_RD)
-				trace_i2c_read(adap, &msgs[i], i);
-			else
-				trace_i2c_write(adap, &msgs[i], i);
-	}
-
-	/* Retry automatically on arbitration loss */
-	orig_jiffies = jiffies;
-	for (ret = 0, try = 0; try <= adap->retries; try++) {
-		ret = adap->algo->master_xfer(adap, msgs, num);
-		if (ret != -EAGAIN)
-			break;
-		if (time_after(jiffies, orig_jiffies + adap->timeout))
-			break;
-	}
-
-	if (static_key_false(&i2c_trace_msg)) {
-		int i;
-		for (i = 0; i < ret; i++)
-			if (msgs[i].flags & I2C_M_RD)
-				trace_i2c_reply(adap, &msgs[i], i);
-		trace_i2c_result(adap, i, ret);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(__i2c_transfer);
-
-/**
- * i2c_transfer - execute a single or combined I2C message
- * @adap: Handle to I2C bus
- * @msgs: One or more messages to execute before STOP is issued to
- *	terminate the operation; each message begins with a START.
- * @num: Number of messages to be executed.
- *
- * Returns negative errno, else the number of messages executed.
- *
- * Note that there is no requirement that each message be sent to
- * the same slave address, although that is the most common model.
- */
-int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
-{
-	int ret;
-
-	/* REVISIT the fault reporting model here is weak:
-	 *
-	 *  - When we get an error after receiving N bytes from a slave,
-	 *    there is no way to report "N".
-	 *
-	 *  - When we get a NAK after transmitting N bytes to a slave,
-	 *    there is no way to report "N" ... or to let the master
-	 *    continue executing the rest of this combined message, if
-	 *    that's the appropriate response.
-	 *
-	 *  - When for example "num" is two and we successfully complete
-	 *    the first message but get an error part way through the
-	 *    second, it's unclear whether that should be reported as
-	 *    one (discarding status on the second message) or errno
-	 *    (discarding status on the first one).
-	 */
-
-	if (adap->algo->master_xfer) {
-#ifdef DEBUG
-		for (ret = 0; ret < num; ret++) {
-			dev_dbg(&adap->dev,
-				"master_xfer[%d] %c, addr=0x%02x, len=%d%s\n",
-				ret, (msgs[ret].flags & I2C_M_RD) ? 'R' : 'W',
-				msgs[ret].addr, msgs[ret].len,
-				(msgs[ret].flags & I2C_M_RECV_LEN) ? "+" : "");
-		}
-#endif
-
-		if (in_atomic() || irqs_disabled()) {
-			ret = i2c_trylock_bus(adap, I2C_LOCK_SEGMENT);
-			if (!ret)
-				/* I2C activity is ongoing. */
-				return -EAGAIN;
-		} else {
-			i2c_lock_bus(adap, I2C_LOCK_SEGMENT);
-		}
-
-		ret = __i2c_transfer(adap, msgs, num);
-		i2c_unlock_bus(adap, I2C_LOCK_SEGMENT);
-
-		return ret;
-	} else {
-		dev_dbg(&adap->dev, "I2C level transfers not supported\n");
-		return -EOPNOTSUPP;
-	}
-}
-EXPORT_SYMBOL(i2c_transfer);
-
-/**
- * i2c_master_send - issue a single I2C message in master transmit mode
- * @client: Handle to slave device
- * @buf: Data that will be written to the slave
- * @count: How many bytes to write, must be less than 64k since msg.len is u16
- *
- * Returns negative errno, or else the number of bytes written.
- */
-int i2c_master_send(const struct i2c_client *client, const char *buf, int count)
-{
-	int ret;
-	struct i2c_adapter *adap = client->adapter;
-	struct i2c_msg msg;
-
-	msg.addr = client->addr;
-	msg.flags = client->flags & I2C_M_TEN;
-	msg.len = count;
-	msg.buf = (char *)buf;
-
-	ret = i2c_transfer(adap, &msg, 1);
-
-	/*
-	 * If everything went ok (i.e. 1 msg transmitted), return #bytes
-	 * transmitted, else error code.
-	 */
-	return (ret == 1) ? count : ret;
-}
-EXPORT_SYMBOL(i2c_master_send);
-
-/**
- * i2c_master_recv - issue a single I2C message in master receive mode
- * @client: Handle to slave device
- * @buf: Where to store data read from slave
- * @count: How many bytes to read, must be less than 64k since msg.len is u16
- *
- * Returns negative errno, or else the number of bytes read.
- */
-int i2c_master_recv(const struct i2c_client *client, char *buf, int count)
-{
-	struct i2c_adapter *adap = client->adapter;
-	struct i2c_msg msg;
-	int ret;
-
-	msg.addr = client->addr;
-	msg.flags = client->flags & I2C_M_TEN;
-	msg.flags |= I2C_M_RD;
-	msg.len = count;
-	msg.buf = buf;
-
-	ret = i2c_transfer(adap, &msg, 1);
-
-	/*
-	 * If everything went ok (i.e. 1 msg received), return #bytes received,
-	 * else error code.
-	 */
-	return (ret == 1) ? count : ret;
-}
-EXPORT_SYMBOL(i2c_master_recv);
-
-/* ----------------------------------------------------
- * the i2c address scanning function
- * Will not work for 10-bit addresses!
- * ----------------------------------------------------
- */
-
-/*
- * Legacy default probe function, mostly relevant for SMBus. The default
- * probe method is a quick write, but it is known to corrupt the 24RF08
- * EEPROMs due to a state machine bug, and could also irreversibly
- * write-protect some EEPROMs, so for address ranges 0x30-0x37 and 0x50-0x5f,
- * we use a short byte read instead. Also, some bus drivers don't implement
- * quick write, so we fallback to a byte read in that case too.
- * On x86, there is another special case for FSC hardware monitoring chips,
- * which want regular byte reads (address 0x73.) Fortunately, these are the
- * only known chips using this I2C address on PC hardware.
- * Returns 1 if probe succeeded, 0 if not.
- */
-static int i2c_default_probe(struct i2c_adapter *adap, unsigned short addr)
-{
-	int err;
-	union i2c_smbus_data dummy;
-
-#ifdef CONFIG_X86
-	if (addr == 0x73 && (adap->class & I2C_CLASS_HWMON)
-	 && i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE_DATA))
-		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
-				     I2C_SMBUS_BYTE_DATA, &dummy);
-	else
-#endif
-	if (!((addr & ~0x07) == 0x30 || (addr & ~0x0f) == 0x50)
-	 && i2c_check_functionality(adap, I2C_FUNC_SMBUS_QUICK))
-		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_WRITE, 0,
-				     I2C_SMBUS_QUICK, NULL);
-	else if (i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_BYTE))
-		err = i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
-				     I2C_SMBUS_BYTE, &dummy);
-	else {
-		dev_warn(&adap->dev, "No suitable probing method supported for address 0x%02X\n",
-			 addr);
-		err = -EOPNOTSUPP;
-	}
-
-	return err >= 0;
-}
-
-static int i2c_detect_address(struct i2c_client *temp_client,
-			      struct i2c_driver *driver)
-{
-	struct i2c_board_info info;
-	struct i2c_adapter *adapter = temp_client->adapter;
-	int addr = temp_client->addr;
-	int err;
-
-	/* Make sure the address is valid */
-	err = i2c_check_7bit_addr_validity_strict(addr);
-	if (err) {
-		dev_warn(&adapter->dev, "Invalid probe address 0x%02x\n",
-			 addr);
-		return err;
-	}
-
-	/* Skip if already in use (7 bit, no need to encode flags) */
-	if (i2c_check_addr_busy(adapter, addr))
-		return 0;
-
-	/* Make sure there is something at this address */
-	if (!i2c_default_probe(adapter, addr))
-		return 0;
-
-	/* Finally call the custom detection function */
-	memset(&info, 0, sizeof(struct i2c_board_info));
-	info.addr = addr;
-	err = driver->detect(temp_client, &info);
-	if (err) {
-		/* -ENODEV is returned if the detection fails. We catch it
-		   here as this isn't an error. */
-		return err == -ENODEV ? 0 : err;
-	}
-
-	/* Consistency check */
-	if (info.type[0] == '\0') {
-		dev_err(&adapter->dev,
-			"%s detection function provided no name for 0x%x\n",
-			driver->driver.name, addr);
-	} else {
-		struct i2c_client *client;
-
-		/* Detection succeeded, instantiate the device */
-		if (adapter->class & I2C_CLASS_DEPRECATED)
-			dev_warn(&adapter->dev,
-				"This adapter will soon drop class based instantiation of devices. "
-				"Please make sure client 0x%02x gets instantiated by other means. "
-				"Check 'Documentation/i2c/instantiating-devices' for details.\n",
-				info.addr);
-
-		dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
-			info.type, info.addr);
-		client = i2c_new_device(adapter, &info);
-		if (client)
-			list_add_tail(&client->detected, &driver->clients);
-		else
-			dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n",
-				info.type, info.addr);
-	}
-	return 0;
-}
-
-static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
-{
-	const unsigned short *address_list;
-	struct i2c_client *temp_client;
-	int i, err = 0;
-	int adap_id = i2c_adapter_id(adapter);
-
-	address_list = driver->address_list;
-	if (!driver->detect || !address_list)
-		return 0;
-
-	/* Warn that the adapter lost class based instantiation */
-	if (adapter->class == I2C_CLASS_DEPRECATED) {
-		dev_dbg(&adapter->dev,
-			"This adapter dropped support for I2C classes and won't auto-detect %s devices anymore. "
-			"If you need it, check 'Documentation/i2c/instantiating-devices' for alternatives.\n",
-			driver->driver.name);
-		return 0;
-	}
-
-	/* Stop here if the classes do not match */
-	if (!(adapter->class & driver->class))
-		return 0;
-
-	/* Set up a temporary client to help detect callback */
-	temp_client = kzalloc(sizeof(struct i2c_client), GFP_KERNEL);
-	if (!temp_client)
-		return -ENOMEM;
-	temp_client->adapter = adapter;
-
-	for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) {
-		dev_dbg(&adapter->dev,
-			"found normal entry for adapter %d, addr 0x%02x\n",
-			adap_id, address_list[i]);
-		temp_client->addr = address_list[i];
-		err = i2c_detect_address(temp_client, driver);
-		if (unlikely(err))
-			break;
-	}
-
-	kfree(temp_client);
-	return err;
-}
-
-int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr)
-{
-	return i2c_smbus_xfer(adap, addr, 0, I2C_SMBUS_READ, 0,
-			      I2C_SMBUS_QUICK, NULL) >= 0;
-}
-EXPORT_SYMBOL_GPL(i2c_probe_func_quick_read);
-
-struct i2c_client *
-i2c_new_probed_device(struct i2c_adapter *adap,
-		      struct i2c_board_info *info,
-		      unsigned short const *addr_list,
-		      int (*probe)(struct i2c_adapter *, unsigned short addr))
-{
-	int i;
-
-	if (!probe)
-		probe = i2c_default_probe;
-
-	for (i = 0; addr_list[i] != I2C_CLIENT_END; i++) {
-		/* Check address validity */
-		if (i2c_check_7bit_addr_validity_strict(addr_list[i]) < 0) {
-			dev_warn(&adap->dev, "Invalid 7-bit address 0x%02x\n",
-				 addr_list[i]);
-			continue;
-		}
-
-		/* Check address availability (7 bit, no need to encode flags) */
-		if (i2c_check_addr_busy(adap, addr_list[i])) {
-			dev_dbg(&adap->dev,
-				"Address 0x%02x already in use, not probing\n",
-				addr_list[i]);
-			continue;
-		}
-
-		/* Test address responsiveness */
-		if (probe(adap, addr_list[i]))
-			break;
-	}
-
-	if (addr_list[i] == I2C_CLIENT_END) {
-		dev_dbg(&adap->dev, "Probing failed, no device found\n");
-		return NULL;
-	}
-
-	info->addr = addr_list[i];
-	return i2c_new_device(adap, info);
-}
-EXPORT_SYMBOL_GPL(i2c_new_probed_device);
-
-struct i2c_adapter *i2c_get_adapter(int nr)
-{
-	struct i2c_adapter *adapter;
-
-	mutex_lock(&core_lock);
-	adapter = idr_find(&i2c_adapter_idr, nr);
-	if (!adapter)
-		goto exit;
-
-	if (try_module_get(adapter->owner))
-		get_device(&adapter->dev);
-	else
-		adapter = NULL;
-
- exit:
-	mutex_unlock(&core_lock);
-	return adapter;
-}
-EXPORT_SYMBOL(i2c_get_adapter);
-
-void i2c_put_adapter(struct i2c_adapter *adap)
-{
-	if (!adap)
-		return;
-
-	put_device(&adap->dev);
-	module_put(adap->owner);
-}
-EXPORT_SYMBOL(i2c_put_adapter);
-
-/* The SMBus parts */
-
-#define POLY    (0x1070U << 3)
-static u8 crc8(u16 data)
-{
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		if (data & 0x8000)
-			data = data ^ POLY;
-		data = data << 1;
-	}
-	return (u8)(data >> 8);
-}
-
-/* Incremental CRC8 over count bytes in the array pointed to by p */
-static u8 i2c_smbus_pec(u8 crc, u8 *p, size_t count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		crc = crc8((crc ^ p[i]) << 8);
-	return crc;
-}
-
-/* Assume a 7-bit address, which is reasonable for SMBus */
-static u8 i2c_smbus_msg_pec(u8 pec, struct i2c_msg *msg)
-{
-	/* The address will be sent first */
-	u8 addr = i2c_8bit_addr_from_msg(msg);
-	pec = i2c_smbus_pec(pec, &addr, 1);
-
-	/* The data buffer follows */
-	return i2c_smbus_pec(pec, msg->buf, msg->len);
-}
-
-/* Used for write only transactions */
-static inline void i2c_smbus_add_pec(struct i2c_msg *msg)
-{
-	msg->buf[msg->len] = i2c_smbus_msg_pec(0, msg);
-	msg->len++;
-}
-
-/* Return <0 on CRC error
-   If there was a write before this read (most cases) we need to take the
-   partial CRC from the write part into account.
-   Note that this function does modify the message (we need to decrease the
-   message length to hide the CRC byte from the caller). */
-static int i2c_smbus_check_pec(u8 cpec, struct i2c_msg *msg)
-{
-	u8 rpec = msg->buf[--msg->len];
-	cpec = i2c_smbus_msg_pec(cpec, msg);
-
-	if (rpec != cpec) {
-		pr_debug("Bad PEC 0x%02x vs. 0x%02x\n",
-			rpec, cpec);
-		return -EBADMSG;
-	}
-	return 0;
-}
-
-/**
- * i2c_smbus_read_byte - SMBus "receive byte" protocol
- * @client: Handle to slave device
- *
- * This executes the SMBus "receive byte" protocol, returning negative errno
- * else the byte received from the device.
- */
-s32 i2c_smbus_read_byte(const struct i2c_client *client)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, 0,
-				I2C_SMBUS_BYTE, &data);
-	return (status < 0) ? status : data.byte;
-}
-EXPORT_SYMBOL(i2c_smbus_read_byte);
-
-/**
- * i2c_smbus_write_byte - SMBus "send byte" protocol
- * @client: Handle to slave device
- * @value: Byte to be sent
- *
- * This executes the SMBus "send byte" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_byte(const struct i2c_client *client, u8 value)
-{
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-	                      I2C_SMBUS_WRITE, value, I2C_SMBUS_BYTE, NULL);
-}
-EXPORT_SYMBOL(i2c_smbus_write_byte);
-
-/**
- * i2c_smbus_read_byte_data - SMBus "read byte" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- *
- * This executes the SMBus "read byte" protocol, returning negative errno
- * else a data byte received from the device.
- */
-s32 i2c_smbus_read_byte_data(const struct i2c_client *client, u8 command)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_BYTE_DATA, &data);
-	return (status < 0) ? status : data.byte;
-}
-EXPORT_SYMBOL(i2c_smbus_read_byte_data);
-
-/**
- * i2c_smbus_write_byte_data - SMBus "write byte" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @value: Byte being written
- *
- * This executes the SMBus "write byte" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_byte_data(const struct i2c_client *client, u8 command,
-			      u8 value)
-{
-	union i2c_smbus_data data;
-	data.byte = value;
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_BYTE_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_byte_data);
-
-/**
- * i2c_smbus_read_word_data - SMBus "read word" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- *
- * This executes the SMBus "read word" protocol, returning negative errno
- * else a 16-bit unsigned "word" received from the device.
- */
-s32 i2c_smbus_read_word_data(const struct i2c_client *client, u8 command)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_WORD_DATA, &data);
-	return (status < 0) ? status : data.word;
-}
-EXPORT_SYMBOL(i2c_smbus_read_word_data);
-
-/**
- * i2c_smbus_write_word_data - SMBus "write word" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @value: 16-bit "word" being written
- *
- * This executes the SMBus "write word" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_word_data(const struct i2c_client *client, u8 command,
-			      u16 value)
-{
-	union i2c_smbus_data data;
-	data.word = value;
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_WORD_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_word_data);
-
-/**
- * i2c_smbus_read_block_data - SMBus "block read" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @values: Byte array into which data will be read; big enough to hold
- *	the data returned by the slave.  SMBus allows at most 32 bytes.
- *
- * This executes the SMBus "block read" protocol, returning negative errno
- * else the number of data bytes in the slave's response.
- *
- * Note that using this function requires that the client's adapter support
- * the I2C_FUNC_SMBUS_READ_BLOCK_DATA functionality.  Not all adapter drivers
- * support this; its emulation through I2C messaging relies on a specific
- * mechanism (I2C_M_RECV_LEN) which may not be implemented.
- */
-s32 i2c_smbus_read_block_data(const struct i2c_client *client, u8 command,
-			      u8 *values)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_BLOCK_DATA, &data);
-	if (status)
-		return status;
-
-	memcpy(values, &data.block[1], data.block[0]);
-	return data.block[0];
-}
-EXPORT_SYMBOL(i2c_smbus_read_block_data);
-
-/**
- * i2c_smbus_write_block_data - SMBus "block write" protocol
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @length: Size of data block; SMBus allows at most 32 bytes
- * @values: Byte array which will be written.
- *
- * This executes the SMBus "block write" protocol, returning negative errno
- * else zero on success.
- */
-s32 i2c_smbus_write_block_data(const struct i2c_client *client, u8 command,
-			       u8 length, const u8 *values)
-{
-	union i2c_smbus_data data;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	memcpy(&data.block[1], values, length);
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_BLOCK_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_block_data);
-
-/* Returns the number of read bytes */
-s32 i2c_smbus_read_i2c_block_data(const struct i2c_client *client, u8 command,
-				  u8 length, u8 *values)
-{
-	union i2c_smbus_data data;
-	int status;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	status = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-				I2C_SMBUS_READ, command,
-				I2C_SMBUS_I2C_BLOCK_DATA, &data);
-	if (status < 0)
-		return status;
-
-	memcpy(values, &data.block[1], data.block[0]);
-	return data.block[0];
-}
-EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data);
-
-s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client, u8 command,
-				   u8 length, const u8 *values)
-{
-	union i2c_smbus_data data;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-	data.block[0] = length;
-	memcpy(data.block + 1, values, length);
-	return i2c_smbus_xfer(client->adapter, client->addr, client->flags,
-			      I2C_SMBUS_WRITE, command,
-			      I2C_SMBUS_I2C_BLOCK_DATA, &data);
-}
-EXPORT_SYMBOL(i2c_smbus_write_i2c_block_data);
-
-/* Simulate a SMBus command using the i2c protocol
-   No checking of parameters is done!  */
-static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
-				   unsigned short flags,
-				   char read_write, u8 command, int size,
-				   union i2c_smbus_data *data)
-{
-	/* So we need to generate a series of msgs. In the case of writing, we
-	  need to use only one message; when reading, we need two. We initialize
-	  most things with sane defaults, to keep the code below somewhat
-	  simpler. */
-	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3];
-	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2];
-	int num = read_write == I2C_SMBUS_READ ? 2 : 1;
-	int i;
-	u8 partial_pec = 0;
-	int status;
-	struct i2c_msg msg[2] = {
-		{
-			.addr = addr,
-			.flags = flags,
-			.len = 1,
-			.buf = msgbuf0,
-		}, {
-			.addr = addr,
-			.flags = flags | I2C_M_RD,
-			.len = 0,
-			.buf = msgbuf1,
-		},
-	};
-
-	msgbuf0[0] = command;
-	switch (size) {
-	case I2C_SMBUS_QUICK:
-		msg[0].len = 0;
-		/* Special case: The read/write field is used as data */
-		msg[0].flags = flags | (read_write == I2C_SMBUS_READ ?
-					I2C_M_RD : 0);
-		num = 1;
-		break;
-	case I2C_SMBUS_BYTE:
-		if (read_write == I2C_SMBUS_READ) {
-			/* Special case: only a read! */
-			msg[0].flags = I2C_M_RD | flags;
-			num = 1;
-		}
-		break;
-	case I2C_SMBUS_BYTE_DATA:
-		if (read_write == I2C_SMBUS_READ)
-			msg[1].len = 1;
-		else {
-			msg[0].len = 2;
-			msgbuf0[1] = data->byte;
-		}
-		break;
-	case I2C_SMBUS_WORD_DATA:
-		if (read_write == I2C_SMBUS_READ)
-			msg[1].len = 2;
-		else {
-			msg[0].len = 3;
-			msgbuf0[1] = data->word & 0xff;
-			msgbuf0[2] = data->word >> 8;
-		}
-		break;
-	case I2C_SMBUS_PROC_CALL:
-		num = 2; /* Special case */
-		read_write = I2C_SMBUS_READ;
-		msg[0].len = 3;
-		msg[1].len = 2;
-		msgbuf0[1] = data->word & 0xff;
-		msgbuf0[2] = data->word >> 8;
-		break;
-	case I2C_SMBUS_BLOCK_DATA:
-		if (read_write == I2C_SMBUS_READ) {
-			msg[1].flags |= I2C_M_RECV_LEN;
-			msg[1].len = 1; /* block length will be added by
-					   the underlying bus driver */
-		} else {
-			msg[0].len = data->block[0] + 2;
-			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 2) {
-				dev_err(&adapter->dev,
-					"Invalid block write size %d\n",
-					data->block[0]);
-				return -EINVAL;
-			}
-			for (i = 1; i < msg[0].len; i++)
-				msgbuf0[i] = data->block[i-1];
-		}
-		break;
-	case I2C_SMBUS_BLOCK_PROC_CALL:
-		num = 2; /* Another special case */
-		read_write = I2C_SMBUS_READ;
-		if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
-			dev_err(&adapter->dev,
-				"Invalid block write size %d\n",
-				data->block[0]);
-			return -EINVAL;
-		}
-		msg[0].len = data->block[0] + 2;
-		for (i = 1; i < msg[0].len; i++)
-			msgbuf0[i] = data->block[i-1];
-		msg[1].flags |= I2C_M_RECV_LEN;
-		msg[1].len = 1; /* block length will be added by
-				   the underlying bus driver */
-		break;
-	case I2C_SMBUS_I2C_BLOCK_DATA:
-		if (read_write == I2C_SMBUS_READ) {
-			msg[1].len = data->block[0];
-		} else {
-			msg[0].len = data->block[0] + 1;
-			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(&adapter->dev,
-					"Invalid block write size %d\n",
-					data->block[0]);
-				return -EINVAL;
-			}
-			for (i = 1; i <= data->block[0]; i++)
-				msgbuf0[i] = data->block[i];
-		}
-		break;
-	default:
-		dev_err(&adapter->dev, "Unsupported transaction %d\n", size);
-		return -EOPNOTSUPP;
-	}
-
-	i = ((flags & I2C_CLIENT_PEC) && size != I2C_SMBUS_QUICK
-				      && size != I2C_SMBUS_I2C_BLOCK_DATA);
-	if (i) {
-		/* Compute PEC if first message is a write */
-		if (!(msg[0].flags & I2C_M_RD)) {
-			if (num == 1) /* Write only */
-				i2c_smbus_add_pec(&msg[0]);
-			else /* Write followed by read */
-				partial_pec = i2c_smbus_msg_pec(0, &msg[0]);
-		}
-		/* Ask for PEC if last message is a read */
-		if (msg[num-1].flags & I2C_M_RD)
-			msg[num-1].len++;
-	}
-
-	status = i2c_transfer(adapter, msg, num);
-	if (status < 0)
-		return status;
-
-	/* Check PEC if last message is a read */
-	if (i && (msg[num-1].flags & I2C_M_RD)) {
-		status = i2c_smbus_check_pec(partial_pec, &msg[num-1]);
-		if (status < 0)
-			return status;
-	}
-
-	if (read_write == I2C_SMBUS_READ)
-		switch (size) {
-		case I2C_SMBUS_BYTE:
-			data->byte = msgbuf0[0];
-			break;
-		case I2C_SMBUS_BYTE_DATA:
-			data->byte = msgbuf1[0];
-			break;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			data->word = msgbuf1[0] | (msgbuf1[1] << 8);
-			break;
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			for (i = 0; i < data->block[0]; i++)
-				data->block[i+1] = msgbuf1[i];
-			break;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-			for (i = 0; i < msgbuf1[0] + 1; i++)
-				data->block[i] = msgbuf1[i];
-			break;
-		}
-	return 0;
-}
-
-/**
- * i2c_smbus_xfer - execute SMBus protocol operations
- * @adapter: Handle to I2C bus
- * @addr: Address of SMBus slave on that bus
- * @flags: I2C_CLIENT_* flags (usually zero or I2C_CLIENT_PEC)
- * @read_write: I2C_SMBUS_READ or I2C_SMBUS_WRITE
- * @command: Byte interpreted by slave, for protocols which use such bytes
- * @protocol: SMBus protocol operation to execute, such as I2C_SMBUS_PROC_CALL
- * @data: Data to be read or written
- *
- * This executes an SMBus protocol operation, and returns a negative
- * errno code else zero on success.
- */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
-{
-	unsigned long orig_jiffies;
-	int try;
-	s32 res;
-
-	/* If enabled, the following two tracepoints are conditional on
-	 * read_write and protocol.
-	 */
-	trace_smbus_write(adapter, addr, flags, read_write,
-			  command, protocol, data);
-	trace_smbus_read(adapter, addr, flags, read_write,
-			 command, protocol);
-
-	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
-
-	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
-		/* Retry automatically on arbitration loss */
-		orig_jiffies = jiffies;
-		for (res = 0, try = 0; try <= adapter->retries; try++) {
-			res = adapter->algo->smbus_xfer(adapter, addr, flags,
-							read_write, command,
-							protocol, data);
-			if (res != -EAGAIN)
-				break;
-			if (time_after(jiffies,
-				       orig_jiffies + adapter->timeout))
-				break;
-		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
-
-		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
-			goto trace;
-		/*
-		 * Fall back to i2c_smbus_xfer_emulated if the adapter doesn't
-		 * implement native support for the SMBus operation.
-		 */
-	}
-
-	res = i2c_smbus_xfer_emulated(adapter, addr, flags, read_write,
-				      command, protocol, data);
-
-trace:
-	/* If enabled, the reply tracepoint is conditional on read_write. */
-	trace_smbus_reply(adapter, addr, flags, read_write,
-			  command, protocol, data);
-	trace_smbus_result(adapter, addr, flags, read_write,
-			   command, protocol, res);
-
-	return res;
-}
-EXPORT_SYMBOL(i2c_smbus_xfer);
-
-/**
- * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
- * @client: Handle to slave device
- * @command: Byte interpreted by slave
- * @length: Size of data block; SMBus allows at most I2C_SMBUS_BLOCK_MAX bytes
- * @values: Byte array into which data will be read; big enough to hold
- *	the data returned by the slave.  SMBus allows at most
- *	I2C_SMBUS_BLOCK_MAX bytes.
- *
- * This executes the SMBus "block read" protocol if supported by the adapter.
- * If block read is not supported, it emulates it using either word or byte
- * read protocols depending on availability.
- *
- * The addresses of the I2C slave device that are accessed with this function
- * must be mapped to a linear region, so that a block read will have the same
- * effect as a byte read. Before using this function you must double-check
- * if the I2C slave does support exchanging a block transfer with a byte
- * transfer.
- */
-s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
-					      u8 command, u8 length, u8 *values)
-{
-	u8 i = 0;
-	int status;
-
-	if (length > I2C_SMBUS_BLOCK_MAX)
-		length = I2C_SMBUS_BLOCK_MAX;
-
-	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
-		return i2c_smbus_read_i2c_block_data(client, command, length, values);
-
-	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA))
-		return -EOPNOTSUPP;
-
-	if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_WORD_DATA)) {
-		while ((i + 2) <= length) {
-			status = i2c_smbus_read_word_data(client, command + i);
-			if (status < 0)
-				return status;
-			values[i] = status & 0xff;
-			values[i + 1] = status >> 8;
-			i += 2;
-		}
-	}
-
-	while (i < length) {
-		status = i2c_smbus_read_byte_data(client, command + i);
-		if (status < 0)
-			return status;
-		values[i] = status;
-		i++;
-	}
-
-	return i;
-}
-EXPORT_SYMBOL(i2c_smbus_read_i2c_block_data_or_emulated);
-
-#if IS_ENABLED(CONFIG_I2C_SLAVE)
-int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
-{
-	int ret;
-
-	if (!client || !slave_cb) {
-		WARN(1, "insufficient data\n");
-		return -EINVAL;
-	}
-
-	if (!(client->flags & I2C_CLIENT_SLAVE))
-		dev_warn(&client->dev, "%s: client slave flag not set. You might see address collisions\n",
-			 __func__);
-
-	if (!(client->flags & I2C_CLIENT_TEN)) {
-		/* Enforce stricter address checking */
-		ret = i2c_check_7bit_addr_validity_strict(client->addr);
-		if (ret) {
-			dev_err(&client->dev, "%s: invalid address\n", __func__);
-			return ret;
-		}
-	}
-
-	if (!client->adapter->algo->reg_slave) {
-		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
-		return -EOPNOTSUPP;
-	}
-
-	client->slave_cb = slave_cb;
-
-	i2c_lock_adapter(client->adapter);
-	ret = client->adapter->algo->reg_slave(client);
-	i2c_unlock_adapter(client->adapter);
-
-	if (ret) {
-		client->slave_cb = NULL;
-		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2c_slave_register);
-
-int i2c_slave_unregister(struct i2c_client *client)
-{
-	int ret;
-
-	if (!client->adapter->algo->unreg_slave) {
-		dev_err(&client->dev, "%s: not supported by adapter\n", __func__);
-		return -EOPNOTSUPP;
-	}
-
-	i2c_lock_adapter(client->adapter);
-	ret = client->adapter->algo->unreg_slave(client);
-	i2c_unlock_adapter(client->adapter);
-
-	if (ret == 0)
-		client->slave_cb = NULL;
-	else
-		dev_err(&client->dev, "%s: adapter returned error %d\n", __func__, ret);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i2c_slave_unregister);
-
-/**
- * i2c_detect_slave_mode - detect operation mode
- * @dev: The device owning the bus
- *
- * This checks the device nodes for an I2C slave by checking the address
- * used in the reg property. If the address match the I2C_OWN_SLAVE_ADDRESS
- * flag this means the device is configured to act as a I2C slave and it will
- * be listening at that address.
- *
- * Returns true if an I2C own slave address is detected, otherwise returns
- * false.
- */
-bool i2c_detect_slave_mode(struct device *dev)
-{
-	if (IS_BUILTIN(CONFIG_OF) && dev->of_node) {
-		struct device_node *child;
-		u32 reg;
-
-		for_each_child_of_node(dev->of_node, child) {
-			of_property_read_u32(child, "reg", &reg);
-			if (reg & I2C_OWN_SLAVE_ADDRESS) {
-				of_node_put(child);
-				return true;
-			}
-		}
-	} else if (IS_BUILTIN(CONFIG_ACPI) && ACPI_HANDLE(dev)) {
-		dev_dbg(dev, "ACPI slave is not supported yet\n");
-	}
-	return false;
-}
-EXPORT_SYMBOL_GPL(i2c_detect_slave_mode);
-
-#endif
-
-MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
-MODULE_DESCRIPTION("I2C-Bus main module");
-MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 17700bf..3b63f5e 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h

@@ -27,3 +27,27 @@ extern struct rw_semaphore	__i2c_board_lock;
 extern struct list_head	__i2c_board_list;
 extern int		__i2c_first_dynamic_bus_num;
 
+int i2c_check_addr_validity(unsigned addr, unsigned short flags);
+int i2c_check_7bit_addr_validity_strict(unsigned short addr);
+
+#ifdef CONFIG_ACPI
+void i2c_acpi_register_devices(struct i2c_adapter *adap);
+#else /* CONFIG_ACPI */
+static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { }
+#endif /* CONFIG_ACPI */
+extern struct notifier_block i2c_acpi_notifier;
+
+#ifdef CONFIG_ACPI_I2C_OPREGION
+int i2c_acpi_install_space_handler(struct i2c_adapter *adapter);
+void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter);
+#else /* CONFIG_ACPI_I2C_OPREGION */
+static inline int i2c_acpi_install_space_handler(struct i2c_adapter *adapter) { return 0; }
+static inline void i2c_acpi_remove_space_handler(struct i2c_adapter *adapter) { }
+#endif /* CONFIG_ACPI_I2C_OPREGION */
+
+#ifdef CONFIG_OF
+void of_i2c_register_devices(struct i2c_adapter *adap);
+#else
+static inline void of_i2c_register_devices(struct i2c_adapter *adap) { }
+#endif
+extern struct notifier_block i2c_of_notifier;

diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c
index 06af583..4a9ad91 100644
--- a/drivers/i2c/i2c-stub.c
+++ b/drivers/i2c/i2c-stub.c

@@ -16,6 +16,7 @@
 */
 
 #define DEBUG 1
+#define pr_fmt(fmt) "i2c-stub: " fmt
 
 #include <linux/errno.h>
 #include <linux/i2c.h>
@@ -342,7 +343,7 @@ static int __init i2c_stub_allocate_banks(int i)
 	if (!chip->bank_words)
 		return -ENOMEM;
 
-	pr_debug("i2c-stub: Allocated %u banks of %u words each (registers 0x%02x to 0x%02x)\n",
+	pr_debug("Allocated %u banks of %u words each (registers 0x%02x to 0x%02x)\n",
 		 chip->bank_mask, chip->bank_size, chip->bank_start,
 		 chip->bank_end);
 
@@ -363,28 +364,27 @@ static int __init i2c_stub_init(void)
 	int i, ret;
 
 	if (!chip_addr[0]) {
-		pr_err("i2c-stub: Please specify a chip address\n");
+		pr_err("Please specify a chip address\n");
 		return -ENODEV;
 	}
 
 	for (i = 0; i < MAX_CHIPS && chip_addr[i]; i++) {
 		if (chip_addr[i] < 0x03 || chip_addr[i] > 0x77) {
-			pr_err("i2c-stub: Invalid chip address 0x%02x\n",
+			pr_err("Invalid chip address 0x%02x\n",
 			       chip_addr[i]);
 			return -EINVAL;
 		}
 
-		pr_info("i2c-stub: Virtual chip at 0x%02x\n", chip_addr[i]);
+		pr_info("Virtual chip at 0x%02x\n", chip_addr[i]);
 	}
 
 	/* Allocate memory for all chips at once */
 	stub_chips_nr = i;
 	stub_chips = kcalloc(stub_chips_nr, sizeof(struct stub_chip),
 			     GFP_KERNEL);
-	if (!stub_chips) {
-		pr_err("i2c-stub: Out of memory\n");
+	if (!stub_chips)
 		return -ENOMEM;
-	}
+
 	for (i = 0; i < stub_chips_nr; i++) {
 		INIT_LIST_HEAD(&stub_chips[i].smbus_blocks);
 

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 2303976..be944d5 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c

@@ -995,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	free_rd_atomic_resource(qp, res);
 	rxe_advance_resp_resource(qp);
 
-	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt));
+	memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
+	       sizeof(skb->cb) - sizeof(ack_pkt));
 
 	res->type = RXE_ATOMIC_MASK;
 	res->atomic.skb = skb;

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index fcbed35..0e662656 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c

@@ -1452,7 +1452,7 @@ static void
 isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct isert_conn *isert_conn = wc->qp->qp_context;
-	struct ib_device *ib_dev = isert_conn->cm_id->device;
+	struct ib_device *ib_dev = isert_conn->device->ib_device;
 
 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		isert_print_wc(wc, "login recv");

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1ced073..402275b 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c

@@ -1157,8 +1157,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
 	}
 	spin_unlock_irqrestore(&ioctx->spinlock, flags);
 
-	pr_debug("Aborting cmd with state %d and tag %lld\n", state,
-		 ioctx->cmd.tag);
+	pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
+		 ioctx->state, ioctx->cmd.tag);
 
 	switch (state) {
 	case SRPT_STATE_NEW:

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6ee3a25..f73ff28 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig

@@ -23,7 +23,7 @@
 config IOMMU_IO_PGTABLE_LPAE
 	bool "ARMv7/v8 Long Descriptor Format"
 	select IOMMU_IO_PGTABLE
-	depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
+	depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
 	help
 	  Enable support for the ARM long descriptor pagetable format.
 	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -219,7 +219,7 @@
 
 config ROCKCHIP_IOMMU
 	bool "Rockchip IOMMU Support"
-	depends on ARM
+	depends on ARM || ARM64
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	select IOMMU_API
 	select ARM_DMA_USE_IOMMU
@@ -274,7 +274,7 @@
 
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
-	depends on ARM_LPAE
+	depends on ARM || IOMMU_DMA
 	depends on ARCH_RENESAS || COMPILE_TEST
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f16d0f2..688e775 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -91,25 +91,6 @@ LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
 LIST_HEAD(acpihid_map);
 
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
-	unsigned long iova_pfn;
-	unsigned long pages;
-	struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
-	spinlock_t lock;
-	unsigned next;
-	struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
 /*
  * Domain for untranslated devices - only allocated
  * if iommu=pt passed on kernel cmd line.
@@ -140,6 +121,8 @@ struct iommu_dev_data {
 					     PPR completions */
 	u32 errata;			  /* Bitmap for errata to apply */
 	bool use_vapic;			  /* Enable device to use vapic mode */
+
+	struct ratelimit_state rs;	  /* Ratelimit IOPF messages */
 };
 
 /*
@@ -155,6 +138,20 @@ static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+	u64 counter; /* Flush counter when this entry was added to the queue */
+};
+
+struct flush_queue {
+	struct flush_queue_entry *entries;
+	unsigned head, tail;
+	spinlock_t lock;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -164,6 +161,36 @@ struct dma_ops_domain {
 
 	/* IOVA RB-Tree */
 	struct iova_domain iovad;
+
+	struct flush_queue __percpu *flush_queue;
+
+	/*
+	 * We need two counter here to be race-free wrt. IOTLB flushing and
+	 * adding entries to the flush queue.
+	 *
+	 * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+	 * New entries added to the flush ring-buffer get their 'counter' value
+	 * from here. This way we can make sure that entries added to the queue
+	 * (or other per-cpu queues of the same domain) while the TLB is about
+	 * to be flushed are not considered to be flushed already.
+	 */
+	atomic64_t flush_start_cnt;
+
+	/*
+	 * The flush_finish_cnt is incremented when an IOTLB flush is complete.
+	 * This value is always smaller than flush_start_cnt. The queue_add
+	 * function frees all IOVAs that have a counter value smaller than
+	 * flush_finish_cnt. This makes sure that we only free IOVAs that are
+	 * flushed out of the IOTLB of the domain.
+	 */
+	atomic64_t flush_finish_cnt;
+
+	/*
+	 * Timer to make sure we don't keep IOVAs around unflushed
+	 * for too long
+	 */
+	struct timer_list flush_timer;
+	atomic_t flush_timer_on;
 };
 
 static struct iova_domain reserved_iova_ranges;
@@ -255,6 +282,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
 	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
 	spin_unlock_irqrestore(&dev_data_list_lock, flags);
 
+	ratelimit_default_init(&dev_data->rs);
+
 	return dev_data;
 }
 
@@ -553,6 +582,29 @@ static void dump_command(unsigned long phys_addr)
 		pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 }
 
+static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+					u64 address, int flags)
+{
+	struct iommu_dev_data *dev_data = NULL;
+	struct pci_dev *pdev;
+
+	pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
+	if (pdev)
+		dev_data = get_dev_data(&pdev->dev);
+
+	if (dev_data && __ratelimit(&dev_data->rs)) {
+		dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			domain_id, address, flags);
+	} else if (printk_ratelimit()) {
+		pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			domain_id, address, flags);
+	}
+
+	if (pdev)
+		pci_dev_put(pdev);
+}
+
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
 	int type, devid, domid, flags;
@@ -577,7 +629,12 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		goto retry;
 	}
 
-	printk(KERN_ERR "AMD-Vi: Event logged [");
+	if (type == EVENT_TYPE_IO_FAULT) {
+		amd_iommu_report_page_fault(devid, domid, address, flags);
+		return;
+	} else {
+		printk(KERN_ERR "AMD-Vi: Event logged [");
+	}
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
@@ -587,12 +644,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		       address, flags);
 		dump_dte_entry(devid);
 		break;
-	case EVENT_TYPE_IO_FAULT:
-		printk("IO_PAGE_FAULT device=%02x:%02x.%x "
-		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       domid, address, flags);
-		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
 		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 		       "address=0x%016llx flags=0x%04x]\n",
@@ -850,19 +901,20 @@ static int wait_on_sem(volatile u64 *sem)
 }
 
 static void copy_cmd_to_buffer(struct amd_iommu *iommu,
-			       struct iommu_cmd *cmd,
-			       u32 tail)
+			       struct iommu_cmd *cmd)
 {
 	u8 *target;
 
-	target = iommu->cmd_buf + tail;
-	tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+	target = iommu->cmd_buf + iommu->cmd_buf_tail;
+
+	iommu->cmd_buf_tail += sizeof(*cmd);
+	iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
 
 	/* Copy command to buffer */
 	memcpy(target, cmd, sizeof(*cmd));
 
 	/* Tell the IOMMU about it */
-	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 }
 
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1020,33 +1072,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
 				      struct iommu_cmd *cmd,
 				      bool sync)
 {
-	u32 left, tail, head, next_tail;
+	unsigned int count = 0;
+	u32 left, next_tail;
 
+	next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 again:
-
-	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-	next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
-	left      = (head - next_tail) % CMD_BUFFER_SIZE;
+	left      = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
 
 	if (left <= 0x20) {
-		struct iommu_cmd sync_cmd;
-		int ret;
+		/* Skip udelay() the first time around */
+		if (count++) {
+			if (count == LOOP_TIMEOUT) {
+				pr_err("AMD-Vi: Command buffer timeout\n");
+				return -EIO;
+			}
 
-		iommu->cmd_sem = 0;
+			udelay(1);
+		}
 
-		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
-		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
-
-		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
-			return ret;
+		/* Update head and recheck remaining space */
+		iommu->cmd_buf_head = readl(iommu->mmio_base +
+					    MMIO_CMD_HEAD_OFFSET);
 
 		goto again;
 	}
 
-	copy_cmd_to_buffer(iommu, cmd, tail);
+	copy_cmd_to_buffer(iommu, cmd);
 
-	/* We need to sync now to make sure all commands are processed */
+	/* Do we need to make sure all commands are processed? */
 	iommu->need_sync = sync;
 
 	return 0;
@@ -1735,6 +1788,180 @@ static void free_gcr3_table(struct protection_domain *domain)
 	free_page((unsigned long)domain->gcr3_tbl);
 }
 
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		kfree(queue->entries);
+	}
+
+	free_percpu(dom->flush_queue);
+
+	dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+	int cpu;
+
+	atomic64_set(&dom->flush_start_cnt,  0);
+	atomic64_set(&dom->flush_finish_cnt, 0);
+
+	dom->flush_queue = alloc_percpu(struct flush_queue);
+	if (!dom->flush_queue)
+		return -ENOMEM;
+
+	/* First make sure everything is cleared */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->head    = 0;
+		queue->tail    = 0;
+		queue->entries = NULL;
+	}
+
+	/* Now start doing the allocation */
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
+					 GFP_KERNEL);
+		if (!queue->entries) {
+			dma_ops_domain_free_flush_queue(dom);
+			return -ENOMEM;
+		}
+
+		spin_lock_init(&queue->lock);
+	}
+
+	return 0;
+}
+
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+	atomic64_inc(&dom->flush_start_cnt);
+	domain_flush_tlb(&dom->domain);
+	domain_flush_complete(&dom->domain);
+	atomic64_inc(&dom->flush_finish_cnt);
+}
+
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+	assert_spin_locked(&queue->lock);
+
+	return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+	for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+	unsigned idx = queue->tail;
+
+	assert_spin_locked(&queue->lock);
+	queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+	return idx;
+}
+
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+	assert_spin_locked(&queue->lock);
+	queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+				    struct flush_queue *queue)
+{
+	u64 counter = atomic64_read(&dom->flush_finish_cnt);
+	int idx;
+
+	queue_ring_for_each(idx, queue) {
+		/*
+		 * This assumes that counter values in the ring-buffer are
+		 * monotonously rising.
+		 */
+		if (queue->entries[idx].counter >= counter)
+			break;
+
+		free_iova_fast(&dom->iovad,
+			       queue->entries[idx].iova_pfn,
+			       queue->entries[idx].pages);
+
+		queue_ring_remove_head(queue);
+	}
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue *queue;
+	unsigned long flags;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(dom->flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
+
+	/*
+	 * First remove the enries from the ring-buffer that are already
+	 * flushed to make the below queue_ring_full() check less likely
+	 */
+	queue_ring_free_flushed(dom, queue);
+
+	/*
+	 * When ring-queue is full, flush the entries from the IOTLB so
+	 * that we can free all entries with queue_ring_free_flushed()
+	 * below.
+	 */
+	if (queue_ring_full(queue)) {
+		dma_ops_domain_flush_tlb(dom);
+		queue_ring_free_flushed(dom, queue);
+	}
+
+	idx = queue_ring_add(queue);
+
+	queue->entries[idx].iova_pfn = address;
+	queue->entries[idx].pages    = pages;
+	queue->entries[idx].counter  = atomic64_read(&dom->flush_start_cnt);
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
+		mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
+
+	put_cpu_ptr(dom->flush_queue);
+}
+
+static void queue_flush_timeout(unsigned long data)
+{
+	struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+	int cpu;
+
+	atomic_set(&dom->flush_timer_on, 0);
+
+	dma_ops_domain_flush_tlb(dom);
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+		unsigned long flags;
+
+		queue = per_cpu_ptr(dom->flush_queue, cpu);
+		spin_lock_irqsave(&queue->lock, flags);
+		queue_ring_free_flushed(dom, queue);
+		spin_unlock_irqrestore(&queue->lock, flags);
+	}
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1746,6 +1973,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
 	del_domain_from_list(&dom->domain);
 
+	if (timer_pending(&dom->flush_timer))
+		del_timer(&dom->flush_timer);
+
+	dma_ops_domain_free_flush_queue(dom);
+
 	put_iova_domain(&dom->iovad);
 
 	free_pagetable(&dom->domain);
@@ -1784,6 +2016,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	/* Initialize reserved ranges */
 	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
+	if (dma_ops_domain_alloc_flush_queue(dma_dom))
+		goto free_dma_dom;
+
+	setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
+		    (unsigned long)dma_dom);
+
+	atomic_set(&dma_dom->flush_timer_on, 0);
+
 	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
@@ -1846,7 +2086,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 		flags    |= tmp;
 	}
 
-	flags &= ~(0xffffUL);
+
+	flags &= ~(DTE_FLAG_SA | 0xffffULL);
 	flags |= domain->id;
 
 	amd_iommu_dev_table[devid].data[1]  = flags;
@@ -2227,92 +2468,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
  *
  *****************************************************************************/
 
-static void __queue_flush(struct flush_queue *queue)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-	int idx;
-
-	/* First flush TLB of all known domains */
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-	list_for_each_entry(domain, &amd_iommu_pd_list, list)
-		domain_flush_tlb(domain);
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-
-	/* Wait until flushes have completed */
-	domain_flush_complete(NULL);
-
-	for (idx = 0; idx < queue->next; ++idx) {
-		struct flush_queue_entry *entry;
-
-		entry = queue->entries + idx;
-
-		free_iova_fast(&entry->dma_dom->iovad,
-				entry->iova_pfn,
-				entry->pages);
-
-		/* Not really necessary, just to make sure we catch any bugs */
-		entry->dma_dom = NULL;
-	}
-
-	queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue;
-		unsigned long flags;
-
-		queue = per_cpu_ptr(&flush_queue, cpu);
-		spin_lock_irqsave(&queue->lock, flags);
-		if (queue->next > 0)
-			__queue_flush(queue);
-		spin_unlock_irqrestore(&queue->lock, flags);
-	}
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
-	atomic_set(&queue_timer_on, 0);
-	queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
-		      unsigned long address, unsigned long pages)
-{
-	struct flush_queue_entry *entry;
-	struct flush_queue *queue;
-	unsigned long flags;
-	int idx;
-
-	pages     = __roundup_pow_of_two(pages);
-	address >>= PAGE_SHIFT;
-
-	queue = get_cpu_ptr(&flush_queue);
-	spin_lock_irqsave(&queue->lock, flags);
-
-	if (queue->next == FLUSH_QUEUE_SIZE)
-		__queue_flush(queue);
-
-	idx   = queue->next++;
-	entry = queue->entries + idx;
-
-	entry->iova_pfn = address;
-	entry->pages    = pages;
-	entry->dma_dom  = dma_dom;
-
-	spin_unlock_irqrestore(&queue->lock, flags);
-
-	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
-		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
-
-	put_cpu_ptr(&flush_queue);
-}
-
-
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2807,7 +2962,7 @@ static int init_reserved_iova_ranges(void)
 
 int __init amd_iommu_init_api(void)
 {
-	int ret, cpu, err = 0;
+	int ret, err = 0;
 
 	ret = iova_cache_get();
 	if (ret)
@@ -2817,18 +2972,6 @@ int __init amd_iommu_init_api(void)
 	if (ret)
 		return ret;
 
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
-					 sizeof(*queue->entries),
-					 GFP_KERNEL);
-		if (!queue->entries)
-			goto out_put_iova;
-
-		spin_lock_init(&queue->lock);
-	}
-
 	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
@@ -2840,23 +2983,12 @@ int __init amd_iommu_init_api(void)
 	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
 	if (err)
 		return err;
+
 	return 0;
-
-out_put_iova:
-	for_each_possible_cpu(cpu) {
-		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
-		kfree(queue->entries);
-	}
-
-	return -ENOMEM;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
-	setup_timer(&queue_timer, queue_flush_timeout, 0);
-	atomic_set(&queue_timer_on, 0);
-
 	swiotlb        = iommu_pass_through ? 1 : 0;
 	iommu_detected = 1;
 
@@ -3012,12 +3144,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	switch (dom->type) {
 	case IOMMU_DOMAIN_DMA:
-		/*
-		 * First make sure the domain is no longer referenced from the
-		 * flush queue
-		 */
-		queue_flush_all();
-
 		/* Now release the domain */
 		dma_dom = to_dma_ops_domain(domain);
 		dma_ops_domain_free(dma_dom);
@@ -4281,7 +4407,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
 					    irte_info->index);
 }
 
-static struct irq_domain_ops amd_ir_domain_ops = {
+static const struct irq_domain_ops amd_ir_domain_ops = {
 	.alloc = irq_remapping_alloc,
 	.free = irq_remapping_free,
 	.activate = irq_remapping_activate,

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5a11328..5cc597b 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c

@@ -29,6 +29,7 @@
 #include <linux/export.h>
 #include <linux/iommu.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_dump.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -236,6 +237,7 @@ enum iommu_init_state {
 	IOMMU_INITIALIZED,
 	IOMMU_NOT_FOUND,
 	IOMMU_INIT_ERROR,
+	IOMMU_CMDLINE_DISABLED,
 };
 
 /* Early ioapic and hpet maps from kernel command line */
@@ -588,6 +590,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 
 	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	iommu->cmd_buf_head = 0;
+	iommu->cmd_buf_tail = 0;
 
 	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 }
@@ -1898,6 +1902,14 @@ static void init_device_table_dma(void)
 	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 		set_dev_entry_bit(devid, DEV_ENTRY_VALID);
 		set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+		/*
+		 * In kdump kernels in-flight DMA from the old kernel might
+		 * cause IO_PAGE_FAULTs. There are no reports that a kdump
+		 * actually failed because of that, so just disable fault
+		 * reporting in the hardware to get rid of the messages
+		 */
+		if (is_kdump_kernel())
+			set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
 	}
 }
 
@@ -2097,23 +2109,27 @@ static struct syscore_ops amd_iommu_syscore_ops = {
 	.resume = amd_iommu_resume,
 };
 
-static void __init free_on_init_error(void)
+static void __init free_iommu_resources(void)
 {
 	kmemleak_free(irq_lookup_table);
 	free_pages((unsigned long)irq_lookup_table,
 		   get_order(rlookup_table_size));
+	irq_lookup_table = NULL;
 
 	kmem_cache_destroy(amd_iommu_irq_cache);
 	amd_iommu_irq_cache = NULL;
 
 	free_pages((unsigned long)amd_iommu_rlookup_table,
 		   get_order(rlookup_table_size));
+	amd_iommu_rlookup_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_alias_table,
 		   get_order(alias_table_size));
+	amd_iommu_alias_table = NULL;
 
 	free_pages((unsigned long)amd_iommu_dev_table,
 		   get_order(dev_table_size));
+	amd_iommu_dev_table = NULL;
 
 	free_iommu_all();
 
@@ -2183,6 +2199,7 @@ static void __init free_dma_resources(void)
 {
 	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
 		   get_order(MAX_DOMAIN_ID/8));
+	amd_iommu_pd_alloc_bitmap = NULL;
 
 	free_unity_maps();
 }
@@ -2307,6 +2324,9 @@ static int __init early_amd_iommu_init(void)
 	if (ret)
 		goto out;
 
+	/* Disable any previously enabled IOMMUs */
+	disable_iommus();
+
 	if (amd_iommu_irq_remap)
 		amd_iommu_irq_remap = check_ioapic_information();
 
@@ -2410,6 +2430,13 @@ static int __init state_next(void)
 	case IOMMU_IVRS_DETECTED:
 		ret = early_amd_iommu_init();
 		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+		if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
+			pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
+			free_dma_resources();
+			free_iommu_resources();
+			init_state = IOMMU_CMDLINE_DISABLED;
+			ret = -EINVAL;
+		}
 		break;
 	case IOMMU_ACPI_FINISHED:
 		early_enable_iommus();
@@ -2438,6 +2465,7 @@ static int __init state_next(void)
 		break;
 	case IOMMU_NOT_FOUND:
 	case IOMMU_INIT_ERROR:
+	case IOMMU_CMDLINE_DISABLED:
 		/* Error states => do nothing */
 		ret = -EINVAL;
 		break;
@@ -2451,13 +2479,14 @@ static int __init state_next(void)
 
 static int __init iommu_go_to_state(enum iommu_init_state state)
 {
-	int ret = 0;
+	int ret = -EINVAL;
 
 	while (init_state != state) {
-		ret = state_next();
-		if (init_state == IOMMU_NOT_FOUND ||
-		    init_state == IOMMU_INIT_ERROR)
+		if (init_state == IOMMU_NOT_FOUND         ||
+		    init_state == IOMMU_INIT_ERROR        ||
+		    init_state == IOMMU_CMDLINE_DISABLED)
 			break;
+		ret = state_next();
 	}
 
 	return ret;
@@ -2522,7 +2551,7 @@ static int __init amd_iommu_init(void)
 		free_dma_resources();
 		if (!irq_remapping_enabled) {
 			disable_iommus();
-			free_on_init_error();
+			free_iommu_resources();
 		} else {
 			struct amd_iommu *iommu;
 
@@ -2549,9 +2578,6 @@ int __init amd_iommu_detect(void)
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return -ENODEV;
 
-	if (amd_iommu_disabled)
-		return -ENODEV;
-
 	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
 	if (ret)
 		return ret;

diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 4de8f41..294a409 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h

@@ -322,6 +322,7 @@
 #define IOMMU_PTE_IW (1ULL << 62)
 
 #define DTE_FLAG_IOTLB	(1ULL << 32)
+#define DTE_FLAG_SA	(1ULL << 34)
 #define DTE_FLAG_GV	(1ULL << 55)
 #define DTE_FLAG_MASK	(0x3ffULL << 32)
 #define DTE_GLX_SHIFT	(56)
@@ -516,6 +517,8 @@ struct amd_iommu {
 
 	/* command buffer virtual address */
 	u8 *cmd_buf;
+	u32 cmd_buf_head;
+	u32 cmd_buf_tail;
 
 	/* event buffer virtual address */
 	u8 *evt_buf;

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 380969a..568c400 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c

@@ -408,10 +408,20 @@
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US	100
+#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US	1000000 /* 1s! */
 
 #define MSI_IOVA_BASE			0x8000000
 #define MSI_IOVA_LENGTH			0x100000
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
+#define ACPI_IORT_SMMU_HISILICON_HI161X		0x1
+#endif
+
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
+#endif
+
 static bool disable_bypass;
 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
@@ -597,6 +607,7 @@ struct arm_smmu_device {
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
+#define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
 	u32				options;
 
 	struct arm_smmu_cmdq		cmdq;
@@ -604,6 +615,7 @@ struct arm_smmu_device {
 	struct arm_smmu_priq		priq;
 
 	int				gerr_irq;
+	int				combined_irq;
 
 	unsigned long			ias; /* IPA */
 	unsigned long			oas; /* PA */
@@ -645,7 +657,6 @@ struct arm_smmu_domain {
 	struct mutex			init_mutex; /* Protects smmu pointer */
 
 	struct io_pgtable_ops		*pgtbl_ops;
-	spinlock_t			pgtbl_lock;
 
 	enum arm_smmu_domain_stage	stage;
 	union {
@@ -663,9 +674,20 @@ struct arm_smmu_option_prop {
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
+	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
 	{ 0, NULL},
 };
 
+static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
+						 struct arm_smmu_device *smmu)
+{
+	if ((offset > SZ_64K) &&
+	    (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
+		offset -= SZ_64K;
+
+	return smmu->base + offset;
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct arm_smmu_domain, domain);
@@ -737,7 +759,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
  */
 static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
-	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+	ktime_t timeout;
+	unsigned int delay = 1;
+
+	/* Wait longer if it's queue drain */
+	timeout = ktime_add_us(ktime_get(), drain ?
+					    ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
+					    ARM_SMMU_POLL_TIMEOUT_US);
 
 	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
 		if (ktime_compare(ktime_get(), timeout) > 0)
@@ -747,7 +775,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 			wfe();
 		} else {
 			cpu_relax();
-			udelay(1);
+			udelay(delay);
+			delay *= 2;
 		}
 	}
 
@@ -1302,6 +1331,24 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
+{
+	struct arm_smmu_device *smmu = dev;
+
+	arm_smmu_evtq_thread(irq, dev);
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		arm_smmu_priq_thread(irq, dev);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
+{
+	arm_smmu_gerror_handler(irq, dev);
+	arm_smmu_cmdq_sync_handler(irq, dev);
+	return IRQ_WAKE_THREAD;
+}
+
 /* IO_PGTABLE API */
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -1406,7 +1453,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	}
 
 	mutex_init(&smmu_domain->init_mutex);
-	spin_lock_init(&smmu_domain->pgtbl_lock);
 	return &smmu_domain->domain;
 }
 
@@ -1555,6 +1601,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.iommu_dev	= smmu->dev,
 	};
 
+	if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
+		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops)
 		return -ENOMEM;
@@ -1675,44 +1724,29 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
-	int ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->map(ops, iova, paddr, size, prot);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t
 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-	size_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t
 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 {
-	phys_addr_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
 		return iova;
@@ -1720,11 +1754,7 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->iova_to_phys(ops, iova);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-
-	return ret;
+	return ops->iova_to_phys(ops, iova);
 }
 
 static struct platform_driver arm_smmu_driver;
@@ -1961,8 +1991,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 		return -ENOMEM;
 	}
 
-	q->prod_reg	= smmu->base + prod_off;
-	q->cons_reg	= smmu->base + cons_off;
+	q->prod_reg	= arm_smmu_page1_fixup(prod_off, smmu);
+	q->cons_reg	= arm_smmu_page1_fixup(cons_off, smmu);
 	q->ent_dwords	= dwords;
 
 	q->q_base  = Q_BASE_RWA;
@@ -2218,18 +2248,9 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	devm_add_action(dev, arm_smmu_free_msis, dev);
 }
 
-static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 {
-	int ret, irq;
-	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
-
-	/* Disable IRQs first */
-	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
-				      ARM_SMMU_IRQ_CTRLACK);
-	if (ret) {
-		dev_err(smmu->dev, "failed to disable irqs\n");
-		return ret;
-	}
+	int irq, ret;
 
 	arm_smmu_setup_msis(smmu);
 
@@ -2272,10 +2293,41 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
-			else
-				irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 		}
 	}
+}
+
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+{
+	int ret, irq;
+	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
+
+	/* Disable IRQs first */
+	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
+				      ARM_SMMU_IRQ_CTRLACK);
+	if (ret) {
+		dev_err(smmu->dev, "failed to disable irqs\n");
+		return ret;
+	}
+
+	irq = smmu->combined_irq;
+	if (irq) {
+		/*
+		 * Cavium ThunderX2 implementation doesn't not support unique
+		 * irq lines. Use single irq line for all the SMMUv3 interrupts.
+		 */
+		ret = devm_request_threaded_irq(smmu->dev, irq,
+					arm_smmu_combined_irq_handler,
+					arm_smmu_combined_irq_thread,
+					IRQF_ONESHOT,
+					"arm-smmu-v3-combined-irq", smmu);
+		if (ret < 0)
+			dev_warn(smmu->dev, "failed to enable combined irq\n");
+	} else
+		arm_smmu_setup_unique_irqs(smmu);
+
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
 
 	/* Enable interrupt generation on the SMMU */
 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
@@ -2363,8 +2415,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
 	/* Event queue */
 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
-	writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
-	writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
+	writel_relaxed(smmu->evtq.q.prod,
+		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
+	writel_relaxed(smmu->evtq.q.cons,
+		       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
 
 	enables |= CR0_EVTQEN;
 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2379,9 +2433,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 		writeq_relaxed(smmu->priq.q.q_base,
 			       smmu->base + ARM_SMMU_PRIQ_BASE);
 		writel_relaxed(smmu->priq.q.prod,
-			       smmu->base + ARM_SMMU_PRIQ_PROD);
+			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
 		writel_relaxed(smmu->priq.q.cons,
-			       smmu->base + ARM_SMMU_PRIQ_CONS);
+			       arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
 
 		enables |= CR0_PRIQEN;
 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2605,6 +2659,20 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 }
 
 #ifdef CONFIG_ACPI
+static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+{
+	switch (model) {
+	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
+		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+		break;
+	case ACPI_IORT_SMMU_HISILICON_HI161X:
+		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
+		break;
+	}
+
+	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+}
+
 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 				      struct arm_smmu_device *smmu)
 {
@@ -2617,6 +2685,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 	/* Retrieve SMMUv3 specific data */
 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
 
+	acpi_smmu_get_options(iort_smmu->model, smmu);
+
 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
 
@@ -2652,6 +2722,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 	return ret;
 }
 
+static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
+{
+	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
+		return SZ_64K;
+	else
+		return SZ_128K;
+}
+
 static int arm_smmu_device_probe(struct platform_device *pdev)
 {
 	int irq, ret;
@@ -2668,35 +2746,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	}
 	smmu->dev = dev;
 
-	/* Base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (resource_size(res) + 1 < SZ_128K) {
-		dev_err(dev, "MMIO region too small (%pr)\n", res);
-		return -EINVAL;
-	}
-	ioaddr = res->start;
-
-	smmu->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(smmu->base))
-		return PTR_ERR(smmu->base);
-
-	/* Interrupt lines */
-	irq = platform_get_irq_byname(pdev, "eventq");
-	if (irq > 0)
-		smmu->evtq.q.irq = irq;
-
-	irq = platform_get_irq_byname(pdev, "priq");
-	if (irq > 0)
-		smmu->priq.q.irq = irq;
-
-	irq = platform_get_irq_byname(pdev, "cmdq-sync");
-	if (irq > 0)
-		smmu->cmdq.q.irq = irq;
-
-	irq = platform_get_irq_byname(pdev, "gerror");
-	if (irq > 0)
-		smmu->gerr_irq = irq;
-
 	if (dev->of_node) {
 		ret = arm_smmu_device_dt_probe(pdev, smmu);
 	} else {
@@ -2708,6 +2757,40 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	/* Set bypass mode according to firmware probing result */
 	bypass = !!ret;
 
+	/* Base address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
+		dev_err(dev, "MMIO region too small (%pr)\n", res);
+		return -EINVAL;
+	}
+	ioaddr = res->start;
+
+	smmu->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(smmu->base))
+		return PTR_ERR(smmu->base);
+
+	/* Interrupt lines */
+
+	irq = platform_get_irq_byname(pdev, "combined");
+	if (irq > 0)
+		smmu->combined_irq = irq;
+	else {
+		irq = platform_get_irq_byname(pdev, "eventq");
+		if (irq > 0)
+			smmu->evtq.q.irq = irq;
+
+		irq = platform_get_irq_byname(pdev, "priq");
+		if (irq > 0)
+			smmu->priq.q.irq = irq;
+
+		irq = platform_get_irq_byname(pdev, "cmdq-sync");
+		if (irq > 0)
+			smmu->cmdq.q.irq = irq;
+
+		irq = platform_get_irq_byname(pdev, "gerror");
+		if (irq > 0)
+			smmu->gerr_irq = irq;
+	}
 	/* Probe the h/w */
 	ret = arm_smmu_device_hw_probe(smmu);
 	if (ret)
@@ -2736,6 +2819,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
 
 	ret = iommu_device_register(&smmu->iommu);
+	if (ret) {
+		dev_err(dev, "Failed to register iommu\n");
+		return ret;
+	}
 
 #ifdef CONFIG_PCI
 	if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
@@ -2768,7 +2855,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id arm_smmu_of_match[] = {
+static const struct of_device_id arm_smmu_of_match[] = {
 	{ .compatible = "arm,smmu-v3", },
 	{ },
 };

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7ec30b0..bc89b4d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c

@@ -312,6 +312,14 @@ enum arm_smmu_implementation {
 	CAVIUM_SMMUV2,
 };
 
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
+#define ACPI_IORT_SMMU_CORELINK_MMU401	0x4
+#endif
+#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
+#define ACPI_IORT_SMMU_CAVIUM_THUNDERX	0x5
+#endif
+
 struct arm_smmu_s2cr {
 	struct iommu_group		*group;
 	int				count;
@@ -425,10 +433,10 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct io_pgtable_ops		*pgtbl_ops;
-	spinlock_t			pgtbl_lock;
 	struct arm_smmu_cfg		cfg;
 	enum arm_smmu_domain_stage	stage;
 	struct mutex			init_mutex; /* Protects smmu pointer */
+	spinlock_t			cb_lock; /* Serialises ATS1* ops */
 	struct iommu_domain		domain;
 };
 
@@ -1010,6 +1018,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.iommu_dev	= smmu->dev,
 	};
 
+	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
 	smmu_domain->smmu = smmu;
 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
 	if (!pgtbl_ops) {
@@ -1102,7 +1113,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	}
 
 	mutex_init(&smmu_domain->init_mutex);
-	spin_lock_init(&smmu_domain->pgtbl_lock);
+	spin_lock_init(&smmu_domain->cb_lock);
 
 	return &smmu_domain->domain;
 }
@@ -1380,35 +1391,23 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
-	int ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->map(ops, iova, paddr, size, prot);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->map(ops, iova, paddr, size, prot);
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 			     size_t size)
 {
-	size_t ret;
-	unsigned long flags;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size);
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-	return ret;
+	return ops->unmap(ops, iova, size);
 }
 
 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1422,10 +1421,11 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	void __iomem *cb_base;
 	u32 tmp;
 	u64 phys;
-	unsigned long va;
+	unsigned long va, flags;
 
 	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
 
+	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
 	if (smmu->version == ARM_SMMU_V2)
@@ -1435,6 +1435,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
 				      !(tmp & ATSR_ACTIVE), 5, 50)) {
+		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 		dev_err(dev,
 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
 			&iova);
@@ -1442,6 +1443,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	}
 
 	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 	if (phys & CB_PAR_F) {
 		dev_err(dev, "translation fault!\n");
 		dev_err(dev, "PAR = 0x%llx\n", phys);
@@ -1454,10 +1456,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 					dma_addr_t iova)
 {
-	phys_addr_t ret;
-	unsigned long flags;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
 	if (domain->type == IOMMU_DOMAIN_IDENTITY)
 		return iova;
@@ -1465,17 +1465,11 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
 	if (!ops)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
-			smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		ret = arm_smmu_iova_to_phys_hard(domain, iova);
-	} else {
-		ret = ops->iova_to_phys(ops, iova);
-	}
+			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+		return arm_smmu_iova_to_phys_hard(domain, iova);
 
-	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
-
-	return ret;
+	return ops->iova_to_phys(ops, iova);
 }
 
 static bool arm_smmu_capable(enum iommu_cap cap)
@@ -2073,6 +2067,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
 		smmu->version = ARM_SMMU_V1;
 		smmu->model = GENERIC_SMMU;
 		break;
+	case ACPI_IORT_SMMU_CORELINK_MMU401:
+		smmu->version = ARM_SMMU_V1_64K;
+		smmu->model = GENERIC_SMMU;
+		break;
 	case ACPI_IORT_SMMU_V2:
 		smmu->version = ARM_SMMU_V2;
 		smmu->model = GENERIC_SMMU;
@@ -2081,6 +2079,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
 		smmu->version = ARM_SMMU_V2;
 		smmu->model = ARM_MMU500;
 		break;
+	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
+		smmu->version = ARM_SMMU_V2;
+		smmu->model = CAVIUM_SMMUV2;
+		break;
 	default:
 		ret = -ENODEV;
 	}

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9403336..9d1cebe 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c

@@ -316,7 +316,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		 * If we have devices with different DMA masks, move the free
 		 * area cache limit down for the benefit of the smaller one.
 		 */
-		iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn);
+		iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
 
 		return 0;
 	}

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 1e95475..687f18f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -481,7 +481,7 @@ struct deferred_flush_data {
 	struct deferred_flush_table *tables;
 };
 
-DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
+static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
 
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
@@ -2390,7 +2390,7 @@ static struct dmar_domain *find_domain(struct device *dev)
 
 	/* No lock here, assumes no domain exit in normal case */
 	info = dev->archdata.iommu;
-	if (info)
+	if (likely(info))
 		return info->domain;
 	return NULL;
 }
@@ -3478,7 +3478,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
+static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
@@ -3525,18 +3525,6 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 	return domain;
 }
 
-static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
-{
-	struct device_domain_info *info;
-
-	/* No lock here, assumes no domain exit in normal case */
-	info = dev->archdata.iommu;
-	if (likely(info))
-		return info->domain;
-
-	return __get_valid_domain_for_dev(dev);
-}
-
 /* Check if the dev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
@@ -3725,10 +3713,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	struct intel_iommu *iommu;
 	struct deferred_flush_entry *entry;
 	struct deferred_flush_data *flush_data;
-	unsigned int cpuid;
 
-	cpuid = get_cpu();
-	flush_data = per_cpu_ptr(&deferred_flush, cpuid);
+	flush_data = raw_cpu_ptr(&deferred_flush);
 
 	/* Flush all CPUs' entries to avoid deferring too much.  If
 	 * this becomes a bottleneck, can just flush us, and rely on
@@ -3761,8 +3747,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
 	}
 	flush_data->size++;
 	spin_unlock_irqrestore(&flush_data->lock, flags);
-
-	put_cpu();
 }
 
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
@@ -3973,7 +3957,7 @@ static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	return !dma_addr;
 }
 
-struct dma_map_ops intel_dma_ops = {
+const struct dma_map_ops intel_dma_ops = {
 	.alloc = intel_alloc_coherent,
 	.free = intel_free_coherent,
 	.map_sg = intel_map_sg,

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 23c4276..f167c0d 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c

@@ -489,6 +489,36 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 }
 EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
 
+int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	struct intel_iommu *iommu;
+	struct intel_svm *svm;
+	int ret = -EINVAL;
+
+	mutex_lock(&pasid_mutex);
+	iommu = intel_svm_device_to_iommu(dev);
+	if (!iommu || !iommu->pasid_table)
+		goto out;
+
+	svm = idr_find(&iommu->pasid_idr, pasid);
+	if (!svm)
+		goto out;
+
+	/* init_mm is used in this case */
+	if (!svm->mm)
+		ret = 1;
+	else if (atomic_read(&svm->mm->mm_users) > 0)
+		ret = 1;
+	else
+		ret = 0;
+
+ out:
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
+
 /* Page request queue descriptor */
 struct page_req_dsc {
 	u64 srr:1;

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 8fc641e..a5b89f6 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c

@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
-static struct irq_domain_ops intel_ir_domain_ops;
+static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
 static int __init parse_ioapics_under_ir(void);
@@ -1407,7 +1407,7 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
 	modify_irte(&data->irq_2_iommu, &entry);
 }
 
-static struct irq_domain_ops intel_ir_domain_ops = {
+static const struct irq_domain_ops intel_ir_domain_ops = {
 	.alloc = intel_irq_remapping_alloc,
 	.free = intel_irq_remapping_free,
 	.activate = intel_irq_remapping_activate,

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 8d6ca28..af330f5 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c

@@ -32,6 +32,7 @@
 
 #define pr_fmt(fmt)	"arm-v7s io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
 #include <linux/iommu.h>
@@ -39,6 +40,7 @@
 #include <linux/kmemleak.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
@@ -92,7 +94,8 @@
 #define ARM_V7S_PTE_TYPE_CONT_PAGE	0x1
 
 #define ARM_V7S_PTE_IS_VALID(pte)	(((pte) & 0x3) != 0)
-#define ARM_V7S_PTE_IS_TABLE(pte, lvl)	(lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE))
+#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
+	((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
 
 /* Page table bits */
 #define ARM_V7S_ATTR_XN(lvl)		BIT(4 * (2 - (lvl)))
@@ -167,6 +170,7 @@ struct arm_v7s_io_pgtable {
 
 	arm_v7s_iopte		*pgd;
 	struct kmem_cache	*l2_tables;
+	spinlock_t		split_lock;
 };
 
 static dma_addr_t __arm_v7s_dma_addr(void *pages)
@@ -186,7 +190,8 @@ static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 				   struct arm_v7s_io_pgtable *data)
 {
-	struct device *dev = data->iop.cfg.iommu_dev;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	struct device *dev = cfg->iommu_dev;
 	dma_addr_t dma;
 	size_t size = ARM_V7S_TABLE_SIZE(lvl);
 	void *table = NULL;
@@ -195,7 +200,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 		table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
 	else if (lvl == 2)
 		table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
-	if (table && !selftest_running) {
+	if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
 		dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -224,10 +229,11 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
 static void __arm_v7s_free_table(void *table, int lvl,
 				 struct arm_v7s_io_pgtable *data)
 {
-	struct device *dev = data->iop.cfg.iommu_dev;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	struct device *dev = cfg->iommu_dev;
 	size_t size = ARM_V7S_TABLE_SIZE(lvl);
 
-	if (!selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
 				 DMA_TO_DEVICE);
 	if (lvl == 1)
@@ -239,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
 			       struct io_pgtable_cfg *cfg)
 {
-	if (selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		return;
 
 	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
@@ -280,6 +286,13 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 	else if (prot & IOMMU_CACHE)
 		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 
+	pte |= ARM_V7S_PTE_TYPE_PAGE;
+	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
+		pte |= ARM_V7S_ATTR_NS_SECTION;
+
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+		pte |= ARM_V7S_ATTR_MTK_4GB;
+
 	return pte;
 }
 
@@ -352,7 +365,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			    int lvl, int num_entries, arm_v7s_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
-	arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
+	arm_v7s_iopte pte;
 	int i;
 
 	for (i = 0; i < num_entries; i++)
@@ -374,13 +387,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			return -EEXIST;
 		}
 
-	pte |= ARM_V7S_PTE_TYPE_PAGE;
-	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
-		pte |= ARM_V7S_ATTR_NS_SECTION;
-
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
-		pte |= ARM_V7S_ATTR_MTK_4GB;
-
+	pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
 	if (num_entries > 1)
 		pte = arm_v7s_pte_to_cont(pte, lvl);
 
@@ -390,6 +397,30 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 	return 0;
 }
 
+static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
+					   arm_v7s_iopte *ptep,
+					   arm_v7s_iopte curr,
+					   struct io_pgtable_cfg *cfg)
+{
+	arm_v7s_iopte old, new;
+
+	new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		new |= ARM_V7S_ATTR_NS_TABLE;
+
+	/*
+	 * Ensure the table itself is visible before its PTE can be.
+	 * Whilst we could get away with cmpxchg64_release below, this
+	 * doesn't have any ordering semantics when !CONFIG_SMP.
+	 */
+	dma_wmb();
+
+	old = cmpxchg_relaxed(ptep, curr, new);
+	__arm_v7s_pte_sync(ptep, 1, cfg);
+
+	return old;
+}
+
 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 			 phys_addr_t paddr, size_t size, int prot,
 			 int lvl, arm_v7s_iopte *ptep)
@@ -411,20 +442,23 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 		return -EINVAL;
 
 	/* Grab a pointer to the next level */
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (!pte) {
 		cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
 		if (!cptep)
 			return -ENOMEM;
 
-		pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE;
-		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-			pte |= ARM_V7S_ATTR_NS_TABLE;
-
-		__arm_v7s_set_pte(ptep, pte, 1, cfg);
-	} else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
-		cptep = iopte_deref(pte, lvl);
+		pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
+		if (pte)
+			__arm_v7s_free_table(cptep, lvl + 1, data);
 	} else {
+		/* We've no easy way of knowing if it's synced yet, so... */
+		__arm_v7s_pte_sync(ptep, 1, cfg);
+	}
+
+	if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
+		cptep = iopte_deref(pte, lvl);
+	} else if (pte) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -477,66 +511,73 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 	kfree(data);
 }
 
-static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
-			       unsigned long iova, int idx, int lvl,
-			       arm_v7s_iopte *ptep)
+static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
+					unsigned long iova, int idx, int lvl,
+					arm_v7s_iopte *ptep)
 {
 	struct io_pgtable *iop = &data->iop;
 	arm_v7s_iopte pte;
 	size_t size = ARM_V7S_BLOCK_SIZE(lvl);
 	int i;
 
+	/* Check that we didn't lose a race to get the lock */
+	pte = *ptep;
+	if (!arm_v7s_pte_is_cont(pte, lvl))
+		return pte;
+
 	ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
-	pte = arm_v7s_cont_to_pte(*ptep, lvl);
-	for (i = 0; i < ARM_V7S_CONT_PAGES; i++) {
-		ptep[i] = pte;
-		pte += size;
-	}
+	pte = arm_v7s_cont_to_pte(pte, lvl);
+	for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
+		ptep[i] = pte + i * size;
 
 	__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 
 	size *= ARM_V7S_CONT_PAGES;
 	io_pgtable_tlb_add_flush(iop, iova, size, size, true);
 	io_pgtable_tlb_sync(iop);
+	return pte;
 }
 
 static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 				   unsigned long iova, size_t size,
-				   arm_v7s_iopte *ptep)
+				   arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
 {
-	unsigned long blk_start, blk_end, blk_size;
-	phys_addr_t blk_paddr;
-	arm_v7s_iopte table = 0;
-	int prot = arm_v7s_pte_to_prot(*ptep, 1);
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_v7s_iopte pte, *tablep;
+	int i, unmap_idx, num_entries, num_ptes;
 
-	blk_size = ARM_V7S_BLOCK_SIZE(1);
-	blk_start = iova & ARM_V7S_LVL_MASK(1);
-	blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1);
-	blk_paddr = *ptep & ARM_V7S_LVL_MASK(1);
+	tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
+	if (!tablep)
+		return 0; /* Bytes unmapped */
 
-	for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-		arm_v7s_iopte *tablep;
+	num_ptes = ARM_V7S_PTES_PER_LVL(2);
+	num_entries = size >> ARM_V7S_LVL_SHIFT(2);
+	unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
 
+	pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
+	if (num_entries > 1)
+		pte = arm_v7s_pte_to_cont(pte, 2);
+
+	for (i = 0; i < num_ptes; i += num_entries, pte += size) {
 		/* Unmap! */
-		if (blk_start == iova)
+		if (i == unmap_idx)
 			continue;
 
-		/* __arm_v7s_map expects a pointer to the start of the table */
-		tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1);
-		if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1,
-				  tablep) < 0) {
-			if (table) {
-				/* Free the table we allocated */
-				tablep = iopte_deref(table, 1);
-				__arm_v7s_free_table(tablep, 2, data);
-			}
-			return 0; /* Bytes unmapped */
-		}
+		__arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
 	}
 
-	__arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg);
-	iova &= ~(blk_size - 1);
-	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+	pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
+	if (pte != blk_pte) {
+		__arm_v7s_free_table(tablep, 2, data);
+
+		if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
+			return 0;
+
+		tablep = iopte_deref(pte, 1);
+		return __arm_v7s_unmap(data, iova, size, 2, tablep);
+	}
+
+	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
 	return size;
 }
 
@@ -555,17 +596,28 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 	idx = ARM_V7S_LVL_IDX(iova, lvl);
 	ptep += idx;
 	do {
-		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i])))
+		pte[i] = READ_ONCE(ptep[i]);
+		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
 			return 0;
-		pte[i] = ptep[i];
 	} while (++i < num_entries);
 
 	/*
 	 * If we've hit a contiguous 'large page' entry at this level, it
 	 * needs splitting first, unless we're unmapping the whole lot.
+	 *
+	 * For splitting, we can't rewrite 16 PTEs atomically, and since we
+	 * can't necessarily assume TEX remap we don't have a software bit to
+	 * mark live entries being split. In practice (i.e. DMA API code), we
+	 * will never be splitting large pages anyway, so just wrap this edge
+	 * case in a lock for the sake of correctness and be done with it.
 	 */
-	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl))
-		arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&data->split_lock, flags);
+		pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+		spin_unlock_irqrestore(&data->split_lock, flags);
+	}
 
 	/* If the size matches this level, we're in the right place */
 	if (num_entries) {
@@ -593,7 +645,7 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_v7s_split_blk_unmap(data, iova, size, ptep);
+		return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
 	}
 
 	/* Keep on walkin' */
@@ -623,7 +675,8 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
 	u32 mask;
 
 	do {
-		pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)];
+		ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
+		pte = READ_ONCE(*ptep);
 		ptep = iopte_deref(pte, lvl);
 	} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
 
@@ -651,7 +704,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
 			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-			    IO_PGTABLE_QUIRK_ARM_MTK_4GB))
+			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+			    IO_PGTABLE_QUIRK_NO_DMA))
 		return NULL;
 
 	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
@@ -663,6 +717,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 	if (!data)
 		return NULL;
 
+	spin_lock_init(&data->split_lock);
 	data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
 					    ARM_V7S_TABLE_SIZE(2),
 					    ARM_V7S_TABLE_SIZE(2),
@@ -749,7 +804,7 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_gather_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
@@ -768,7 +823,7 @@ static int __init arm_v7s_do_selftests(void)
 		.tlb = &dummy_tlb_ops,
 		.oas = 32,
 		.ias = 32,
-		.quirks = IO_PGTABLE_QUIRK_ARM_NS,
+		.quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
 		.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
 	};
 	unsigned int iova, size, iova_start;

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6e5df5e..b182039 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c

@@ -20,6 +20,7 @@
 
 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
 
+#include <linux/atomic.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -99,6 +100,8 @@
 #define ARM_LPAE_PTE_ATTR_HI_MASK	(((arm_lpae_iopte)6) << 52)
 #define ARM_LPAE_PTE_ATTR_MASK		(ARM_LPAE_PTE_ATTR_LO_MASK |	\
 					 ARM_LPAE_PTE_ATTR_HI_MASK)
+/* Software bit for solving coherency races */
+#define ARM_LPAE_PTE_SW_SYNC		(((arm_lpae_iopte)1) << 55)
 
 /* Stage-1 PTE */
 #define ARM_LPAE_PTE_AP_UNPRIV		(((arm_lpae_iopte)1) << 6)
@@ -217,7 +220,7 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (!selftest_running) {
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
 		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
 		if (dma_mapping_error(dev, dma))
 			goto out_free;
@@ -243,40 +246,64 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 static void __arm_lpae_free_pages(void *pages, size_t size,
 				  struct io_pgtable_cfg *cfg)
 {
-	if (!selftest_running)
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
 		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
 				 size, DMA_TO_DEVICE);
 	free_pages_exact(pages, size);
 }
 
+static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,
+				struct io_pgtable_cfg *cfg)
+{
+	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
+				   sizeof(*ptep), DMA_TO_DEVICE);
+}
+
 static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 			       struct io_pgtable_cfg *cfg)
 {
 	*ptep = pte;
 
-	if (!selftest_running)
-		dma_sync_single_for_device(cfg->iommu_dev,
-					   __arm_lpae_dma_addr(ptep),
-					   sizeof(pte), DMA_TO_DEVICE);
+	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+		__arm_lpae_sync_pte(ptep, cfg);
 }
 
 static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			    unsigned long iova, size_t size, int lvl,
 			    arm_lpae_iopte *ptep);
 
+static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
+				phys_addr_t paddr, arm_lpae_iopte prot,
+				int lvl, arm_lpae_iopte *ptep)
+{
+	arm_lpae_iopte pte = prot;
+
+	if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		pte |= ARM_LPAE_PTE_NS;
+
+	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
+		pte |= ARM_LPAE_PTE_TYPE_PAGE;
+	else
+		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
+
+	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
+	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+
+	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
+}
+
 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     arm_lpae_iopte prot, int lvl,
 			     arm_lpae_iopte *ptep)
 {
-	arm_lpae_iopte pte = prot;
-	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_lpae_iopte pte = *ptep;
 
-	if (iopte_leaf(*ptep, lvl)) {
+	if (iopte_leaf(pte, lvl)) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
-	} else if (iopte_type(*ptep, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
+	} else if (iopte_type(pte, lvl) == ARM_LPAE_PTE_TYPE_TABLE) {
 		/*
 		 * We need to unmap and free the old table before
 		 * overwriting it with a block entry.
@@ -289,27 +316,49 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 			return -EINVAL;
 	}
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-		pte |= ARM_LPAE_PTE_NS;
-
-	if (lvl == ARM_LPAE_MAX_LEVELS - 1)
-		pte |= ARM_LPAE_PTE_TYPE_PAGE;
-	else
-		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
-
-	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
-
-	__arm_lpae_set_pte(ptep, pte, cfg);
+	__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
 	return 0;
 }
 
+static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
+					     arm_lpae_iopte *ptep,
+					     arm_lpae_iopte curr,
+					     struct io_pgtable_cfg *cfg)
+{
+	arm_lpae_iopte old, new;
+
+	new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+		new |= ARM_LPAE_PTE_NSTABLE;
+
+	/*
+	 * Ensure the table itself is visible before its PTE can be.
+	 * Whilst we could get away with cmpxchg64_release below, this
+	 * doesn't have any ordering semantics when !CONFIG_SMP.
+	 */
+	dma_wmb();
+
+	old = cmpxchg64_relaxed(ptep, curr, new);
+
+	if ((cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) ||
+	    (old & ARM_LPAE_PTE_SW_SYNC))
+		return old;
+
+	/* Even if it's not ours, there's no point waiting; just kick it */
+	__arm_lpae_sync_pte(ptep, cfg);
+	if (old == curr)
+		WRITE_ONCE(*ptep, new | ARM_LPAE_PTE_SW_SYNC);
+
+	return old;
+}
+
 static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 			  phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
 			  int lvl, arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte *cptep, pte;
 	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	size_t tblsz = ARM_LPAE_GRANULE(data);
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	/* Find our entry at the current level */
@@ -324,20 +373,23 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		return -EINVAL;
 
 	/* Grab a pointer to the next level */
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (!pte) {
-		cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data),
-					       GFP_ATOMIC, cfg);
+		cptep = __arm_lpae_alloc_pages(tblsz, GFP_ATOMIC, cfg);
 		if (!cptep)
 			return -ENOMEM;
 
-		pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE;
-		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
-			pte |= ARM_LPAE_PTE_NSTABLE;
-		__arm_lpae_set_pte(ptep, pte, cfg);
-	} else if (!iopte_leaf(pte, lvl)) {
+		pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
+		if (pte)
+			__arm_lpae_free_pages(cptep, tblsz, cfg);
+	} else if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) &&
+		   !(pte & ARM_LPAE_PTE_SW_SYNC)) {
+		__arm_lpae_sync_pte(ptep, cfg);
+	}
+
+	if (pte && !iopte_leaf(pte, lvl)) {
 		cptep = iopte_deref(pte, data);
-	} else {
+	} else if (pte) {
 		/* We require an unmap first */
 		WARN_ON(!selftest_running);
 		return -EEXIST;
@@ -452,40 +504,55 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 
 static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 				    unsigned long iova, size_t size,
-				    arm_lpae_iopte prot, int lvl,
-				    arm_lpae_iopte *ptep, size_t blk_size)
+				    arm_lpae_iopte blk_pte, int lvl,
+				    arm_lpae_iopte *ptep)
 {
-	unsigned long blk_start, blk_end;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_lpae_iopte pte, *tablep;
 	phys_addr_t blk_paddr;
-	arm_lpae_iopte table = 0;
+	size_t tablesz = ARM_LPAE_GRANULE(data);
+	size_t split_sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
+	int i, unmap_idx = -1;
 
-	blk_start = iova & ~(blk_size - 1);
-	blk_end = blk_start + blk_size;
-	blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift;
+	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
+		return 0;
 
-	for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
-		arm_lpae_iopte *tablep;
+	tablep = __arm_lpae_alloc_pages(tablesz, GFP_ATOMIC, cfg);
+	if (!tablep)
+		return 0; /* Bytes unmapped */
 
+	if (size == split_sz)
+		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
+
+	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+	pte = iopte_prot(blk_pte);
+
+	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
 		/* Unmap! */
-		if (blk_start == iova)
+		if (i == unmap_idx)
 			continue;
 
-		/* __arm_lpae_map expects a pointer to the start of the table */
-		tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data);
-		if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl,
-				   tablep) < 0) {
-			if (table) {
-				/* Free the table we allocated */
-				tablep = iopte_deref(table, data);
-				__arm_lpae_free_pgtable(data, lvl + 1, tablep);
-			}
-			return 0; /* Bytes unmapped */
-		}
+		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
 	}
 
-	__arm_lpae_set_pte(ptep, table, &data->iop.cfg);
-	iova &= ~(blk_size - 1);
-	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+	pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+	if (pte != blk_pte) {
+		__arm_lpae_free_pages(tablep, tablesz, cfg);
+		/*
+		 * We may race against someone unmapping another part of this
+		 * block, but anything else is invalid. We can't misinterpret
+		 * a page entry here since we're never at the last level.
+		 */
+		if (iopte_type(pte, lvl - 1) != ARM_LPAE_PTE_TYPE_TABLE)
+			return 0;
+
+		tablep = iopte_deref(pte, data);
+	}
+
+	if (unmap_idx < 0)
+		return __arm_lpae_unmap(data, iova, size, lvl, tablep);
+
+	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
 	return size;
 }
 
@@ -495,19 +562,18 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 {
 	arm_lpae_iopte pte;
 	struct io_pgtable *iop = &data->iop;
-	size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
 	/* Something went horribly wrong and we ran out of page table */
 	if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS))
 		return 0;
 
 	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (WARN_ON(!pte))
 		return 0;
 
 	/* If the size matches this level, we're in the right place */
-	if (size == blk_size) {
+	if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) {
 		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
@@ -527,9 +593,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_lpae_split_blk_unmap(data, iova, size,
-						iopte_prot(pte), lvl, ptep,
-						blk_size);
+		return arm_lpae_split_blk_unmap(data, iova, size, pte,
+						lvl + 1, ptep);
 	}
 
 	/* Keep on walkin' */
@@ -565,7 +630,8 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 			return 0;
 
 		/* Grab the IOPTE we're interested in */
-		pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data));
+		ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
+		pte = READ_ONCE(*ptep);
 
 		/* Valid entry? */
 		if (!pte)
@@ -673,7 +739,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	u64 reg;
 	struct arm_lpae_io_pgtable *data;
 
-	if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS)
+	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -762,7 +828,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	struct arm_lpae_io_pgtable *data;
 
 	/* The NS quirk doesn't apply at stage 2 */
-	if (cfg->quirks)
+	if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
 		return NULL;
 
 	data = arm_lpae_alloc_pgtable(cfg);
@@ -1066,6 +1132,7 @@ static int __init arm_lpae_do_selftests(void)
 	struct io_pgtable_cfg cfg = {
 		.tlb = &dummy_tlb_ops,
 		.oas = 48,
+		.quirks = IO_PGTABLE_QUIRK_NO_DMA,
 	};
 
 	for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {

diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 969d82c..524263a 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h

@@ -65,11 +65,17 @@ struct io_pgtable_cfg {
 	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
 	 *	when the SoC is in "4GB mode" and they can only access the high
 	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+	 *
+	 * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
+	 *	be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
+	 *	software-emulated IOMMU), such that pagetable updates need not
+	 *	be treated as explicit DMA data.
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
 	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
+	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index cf7ca7e..3f6ea16 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c

@@ -915,13 +915,7 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
  */
 struct iommu_group *generic_device_group(struct device *dev)
 {
-	struct iommu_group *group;
-
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /*
@@ -988,11 +982,7 @@ struct iommu_group *pci_device_group(struct device *dev)
 		return group;
 
 	/* No shared group found, allocate new */
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	return group;
+	return iommu_group_alloc();
 }
 
 /**
@@ -1020,6 +1010,9 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 	if (ops && ops->device_group)
 		group = ops->device_group(dev);
 
+	if (WARN_ON_ONCE(group == NULL))
+		return ERR_PTR(-EINVAL);
+
 	if (IS_ERR(group))
 		return group;
 

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 5c88ba7..246f14c 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c

@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 
 static bool iova_rcache_insert(struct iova_domain *iovad,
 			       unsigned long pfn,
@@ -48,7 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->cached32_node = NULL;
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
-	iovad->dma_32bit_pfn = pfn_32bit;
+	iovad->dma_32bit_pfn = pfn_32bit + 1;
 	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
@@ -63,7 +64,7 @@ __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn)
 		struct rb_node *prev_node = rb_prev(iovad->cached32_node);
 		struct iova *curr_iova =
 			rb_entry(iovad->cached32_node, struct iova, node);
-		*limit_pfn = curr_iova->pfn_lo - 1;
+		*limit_pfn = curr_iova->pfn_lo;
 		return prev_node;
 	}
 }
@@ -135,7 +136,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova,
 static unsigned int
 iova_get_pad_size(unsigned int size, unsigned int limit_pfn)
 {
-	return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1);
+	return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1);
 }
 
 static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
@@ -155,18 +156,15 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	while (curr) {
 		struct iova *curr_iova = rb_entry(curr, struct iova, node);
 
-		if (limit_pfn < curr_iova->pfn_lo)
+		if (limit_pfn <= curr_iova->pfn_lo) {
 			goto move_left;
-		else if (limit_pfn < curr_iova->pfn_hi)
-			goto adjust_limit_pfn;
-		else {
+		} else if (limit_pfn > curr_iova->pfn_hi) {
 			if (size_aligned)
 				pad_size = iova_get_pad_size(size, limit_pfn);
-			if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn)
+			if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn)
 				break;	/* found a free slot */
 		}
-adjust_limit_pfn:
-		limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0;
+		limit_pfn = curr_iova->pfn_lo;
 move_left:
 		prev = curr;
 		curr = rb_prev(curr);
@@ -182,7 +180,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	}
 
 	/* pfn_lo will point to size aligned address if size_aligned is set */
-	new->pfn_lo = limit_pfn - (size + pad_size) + 1;
+	new->pfn_lo = limit_pfn - (size + pad_size);
 	new->pfn_hi = new->pfn_lo + size - 1;
 
 	/* If we have 'prev', it's a valid place to start the insertion. */
@@ -269,7 +267,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (!new_iova)
 		return NULL;
 
-	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
+	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1,
 			new_iova, size_aligned);
 
 	if (ret) {
@@ -398,10 +396,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
-		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
-		preempt_enable();
 		goto retry;
 	}
 
@@ -729,7 +725,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -759,7 +755,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -793,7 +788,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -815,7 +810,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
-	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index b7e14ee..2a38aa1 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c

@@ -8,7 +8,9 @@
  * the Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bitmap.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -21,17 +23,24 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 #include <asm/dma-iommu.h>
 #include <asm/pgalloc.h>
+#endif
 
 #include "io-pgtable.h"
 
+#define IPMMU_CTX_MAX 1
+
 struct ipmmu_vmsa_device {
 	struct device *dev;
 	void __iomem *base;
 	struct list_head list;
 
 	unsigned int num_utlbs;
+	spinlock_t lock;			/* Protects ctx and domains[] */
+	DECLARE_BITMAP(ctx, IPMMU_CTX_MAX);
+	struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
 
 	struct dma_iommu_mapping *mapping;
 };
@@ -47,10 +56,12 @@ struct ipmmu_vmsa_domain {
 	spinlock_t lock;			/* Protects mappings */
 };
 
-struct ipmmu_vmsa_archdata {
+struct ipmmu_vmsa_iommu_priv {
 	struct ipmmu_vmsa_device *mmu;
 	unsigned int *utlbs;
 	unsigned int num_utlbs;
+	struct device *dev;
+	struct list_head list;
 };
 
 static DEFINE_SPINLOCK(ipmmu_devices_lock);
@@ -61,6 +72,24 @@ static struct ipmmu_vmsa_domain *to_vmsa_domain(struct iommu_domain *dom)
 	return container_of(dom, struct ipmmu_vmsa_domain, io_domain);
 }
 
+
+static struct ipmmu_vmsa_iommu_priv *to_priv(struct device *dev)
+{
+#if defined(CONFIG_ARM)
+	return dev->archdata.iommu;
+#else
+	return dev->iommu_fwspec->iommu_priv;
+#endif
+}
+static void set_priv(struct device *dev, struct ipmmu_vmsa_iommu_priv *p)
+{
+#if defined(CONFIG_ARM)
+	dev->archdata.iommu = p;
+#else
+	dev->iommu_fwspec->iommu_priv = p;
+#endif
+}
+
 #define TLB_LOOP_TIMEOUT		100	/* 100us */
 
 /* -----------------------------------------------------------------------------
@@ -293,9 +322,29 @@ static struct iommu_gather_ops ipmmu_gather_ops = {
  * Domain/Context Management
  */
 
+static int ipmmu_domain_allocate_context(struct ipmmu_vmsa_device *mmu,
+					 struct ipmmu_vmsa_domain *domain)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	ret = find_first_zero_bit(mmu->ctx, IPMMU_CTX_MAX);
+	if (ret != IPMMU_CTX_MAX) {
+		mmu->domains[ret] = domain;
+		set_bit(ret, mmu->ctx);
+	}
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+
+	return ret;
+}
+
 static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 {
 	u64 ttbr;
+	int ret;
 
 	/*
 	 * Allocate the page table operations.
@@ -309,7 +358,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	 * non-secure mode.
 	 */
 	domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
-	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
 	domain->cfg.tlb = &ipmmu_gather_ops;
@@ -327,10 +376,15 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 		return -EINVAL;
 
 	/*
-	 * TODO: When adding support for multiple contexts, find an unused
-	 * context.
+	 * Find an unused context.
 	 */
-	domain->context_id = 0;
+	ret = ipmmu_domain_allocate_context(domain->mmu, domain);
+	if (ret == IPMMU_CTX_MAX) {
+		free_io_pgtable_ops(domain->iop);
+		return -EBUSY;
+	}
+
+	domain->context_id = ret;
 
 	/* TTBR0 */
 	ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0];
@@ -372,6 +426,19 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	return 0;
 }
 
+static void ipmmu_domain_free_context(struct ipmmu_vmsa_device *mmu,
+				      unsigned int context_id)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu->lock, flags);
+
+	clear_bit(context_id, mmu->ctx);
+	mmu->domains[context_id] = NULL;
+
+	spin_unlock_irqrestore(&mmu->lock, flags);
+}
+
 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
 	/*
@@ -382,6 +449,7 @@ static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 	 */
 	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
 	ipmmu_tlb_sync(domain);
+	ipmmu_domain_free_context(domain->mmu, domain->context_id);
 }
 
 /* -----------------------------------------------------------------------------
@@ -439,29 +507,35 @@ static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
 static irqreturn_t ipmmu_irq(int irq, void *dev)
 {
 	struct ipmmu_vmsa_device *mmu = dev;
-	struct iommu_domain *io_domain;
-	struct ipmmu_vmsa_domain *domain;
+	irqreturn_t status = IRQ_NONE;
+	unsigned int i;
+	unsigned long flags;
 
-	if (!mmu->mapping)
-		return IRQ_NONE;
+	spin_lock_irqsave(&mmu->lock, flags);
 
-	io_domain = mmu->mapping->domain;
-	domain = to_vmsa_domain(io_domain);
+	/*
+	 * Check interrupts for all active contexts.
+	 */
+	for (i = 0; i < IPMMU_CTX_MAX; i++) {
+		if (!mmu->domains[i])
+			continue;
+		if (ipmmu_domain_irq(mmu->domains[i]) == IRQ_HANDLED)
+			status = IRQ_HANDLED;
+	}
 
-	return ipmmu_domain_irq(domain);
+	spin_unlock_irqrestore(&mmu->lock, flags);
+
+	return status;
 }
 
 /* -----------------------------------------------------------------------------
  * IOMMU Operations
  */
 
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *__ipmmu_domain_alloc(unsigned type)
 {
 	struct ipmmu_vmsa_domain *domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
-
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
@@ -487,8 +561,8 @@ static void ipmmu_domain_free(struct iommu_domain *io_domain)
 static int ipmmu_attach_device(struct iommu_domain *io_domain,
 			       struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
-	struct ipmmu_vmsa_device *mmu = archdata->mmu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_device *mmu = priv->mmu;
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned long flags;
 	unsigned int i;
@@ -513,15 +587,16 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
 			dev_name(mmu->dev), dev_name(domain->mmu->dev));
 		ret = -EINVAL;
-	}
+	} else
+		dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_enable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_enable(domain, priv->utlbs[i]);
 
 	return 0;
 }
@@ -529,12 +604,12 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 static void ipmmu_detach_device(struct iommu_domain *io_domain,
 				struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 	unsigned int i;
 
-	for (i = 0; i < archdata->num_utlbs; ++i)
-		ipmmu_utlb_disable(domain, archdata->utlbs[i]);
+	for (i = 0; i < priv->num_utlbs; ++i)
+		ipmmu_utlb_disable(domain, priv->utlbs[i]);
 
 	/*
 	 * TODO: Optimize by disabling the context when no device is attached.
@@ -595,22 +670,15 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev,
 	return 0;
 }
 
-static int ipmmu_add_device(struct device *dev)
+static int ipmmu_init_platform_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata;
+	struct ipmmu_vmsa_iommu_priv *priv;
 	struct ipmmu_vmsa_device *mmu;
-	struct iommu_group *group = NULL;
 	unsigned int *utlbs;
 	unsigned int i;
 	int num_utlbs;
 	int ret = -ENODEV;
 
-	if (dev->archdata.iommu) {
-		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
-			 dev_name(dev));
-		return -EINVAL;
-	}
-
 	/* Find the master corresponding to the device. */
 
 	num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus",
@@ -647,6 +715,46 @@ static int ipmmu_add_device(struct device *dev)
 		}
 	}
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	priv->mmu = mmu;
+	priv->utlbs = utlbs;
+	priv->num_utlbs = num_utlbs;
+	priv->dev = dev;
+	set_priv(dev, priv);
+	return 0;
+
+error:
+	kfree(utlbs);
+	return ret;
+}
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
+
+static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+{
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	return __ipmmu_domain_alloc(type);
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+	struct ipmmu_vmsa_device *mmu = NULL;
+	struct iommu_group *group;
+	int ret;
+
+	if (to_priv(dev)) {
+		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+			 dev_name(dev));
+		return -EINVAL;
+	}
+
 	/* Create a device group and add the device to it. */
 	group = iommu_group_alloc();
 	if (IS_ERR(group)) {
@@ -664,16 +772,9 @@ static int ipmmu_add_device(struct device *dev)
 		goto error;
 	}
 
-	archdata = kzalloc(sizeof(*archdata), GFP_KERNEL);
-	if (!archdata) {
-		ret = -ENOMEM;
+	ret = ipmmu_init_platform_device(dev);
+	if (ret < 0)
 		goto error;
-	}
-
-	archdata->mmu = mmu;
-	archdata->utlbs = utlbs;
-	archdata->num_utlbs = num_utlbs;
-	dev->archdata.iommu = archdata;
 
 	/*
 	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
@@ -684,6 +785,7 @@ static int ipmmu_add_device(struct device *dev)
 	 * - Make the mapping size configurable ? We currently use a 2GB mapping
 	 *   at a 1GB offset to ensure that NULL VAs will fault.
 	 */
+	mmu = to_priv(dev)->mmu;
 	if (!mmu->mapping) {
 		struct dma_iommu_mapping *mapping;
 
@@ -708,30 +810,30 @@ static int ipmmu_add_device(struct device *dev)
 	return 0;
 
 error:
-	arm_iommu_release_mapping(mmu->mapping);
-
-	kfree(dev->archdata.iommu);
-	kfree(utlbs);
-
-	dev->archdata.iommu = NULL;
+	if (mmu)
+		arm_iommu_release_mapping(mmu->mapping);
 
 	if (!IS_ERR_OR_NULL(group))
 		iommu_group_remove_device(dev);
 
+	kfree(to_priv(dev)->utlbs);
+	kfree(to_priv(dev));
+	set_priv(dev, NULL);
+
 	return ret;
 }
 
 static void ipmmu_remove_device(struct device *dev)
 {
-	struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu;
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
 
 	arm_iommu_detach_device(dev);
 	iommu_group_remove_device(dev);
 
-	kfree(archdata->utlbs);
-	kfree(archdata);
+	kfree(priv->utlbs);
+	kfree(priv);
 
-	dev->archdata.iommu = NULL;
+	set_priv(dev, NULL);
 }
 
 static const struct iommu_ops ipmmu_ops = {
@@ -748,6 +850,144 @@ static const struct iommu_ops ipmmu_ops = {
 	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
 };
 
+#endif /* !CONFIG_ARM && CONFIG_IOMMU_DMA */
+
+#ifdef CONFIG_IOMMU_DMA
+
+static DEFINE_SPINLOCK(ipmmu_slave_devices_lock);
+static LIST_HEAD(ipmmu_slave_devices);
+
+static struct iommu_domain *ipmmu_domain_alloc_dma(unsigned type)
+{
+	struct iommu_domain *io_domain = NULL;
+
+	switch (type) {
+	case IOMMU_DOMAIN_UNMANAGED:
+		io_domain = __ipmmu_domain_alloc(type);
+		break;
+
+	case IOMMU_DOMAIN_DMA:
+		io_domain = __ipmmu_domain_alloc(type);
+		if (io_domain)
+			iommu_get_dma_cookie(io_domain);
+		break;
+	}
+
+	return io_domain;
+}
+
+static void ipmmu_domain_free_dma(struct iommu_domain *io_domain)
+{
+	switch (io_domain->type) {
+	case IOMMU_DOMAIN_DMA:
+		iommu_put_dma_cookie(io_domain);
+		/* fall-through */
+	default:
+		ipmmu_domain_free(io_domain);
+		break;
+	}
+}
+
+static int ipmmu_add_device_dma(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct iommu_group *group;
+
+	/*
+	 * Only let through devices that have been verified in xlate()
+	 * We may get called with dev->iommu_fwspec set to NULL.
+	 */
+	if (!fwspec || !fwspec->iommu_priv)
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_add(&to_priv(dev)->list, &ipmmu_slave_devices);
+	spin_unlock(&ipmmu_slave_devices_lock);
+	return 0;
+}
+
+static void ipmmu_remove_device_dma(struct device *dev)
+{
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+
+	spin_lock(&ipmmu_slave_devices_lock);
+	list_del(&priv->list);
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	iommu_group_remove_device(dev);
+}
+
+static struct device *ipmmu_find_sibling_device(struct device *dev)
+{
+	struct ipmmu_vmsa_iommu_priv *priv = to_priv(dev);
+	struct ipmmu_vmsa_iommu_priv *sibling_priv = NULL;
+	bool found = false;
+
+	spin_lock(&ipmmu_slave_devices_lock);
+
+	list_for_each_entry(sibling_priv, &ipmmu_slave_devices, list) {
+		if (priv == sibling_priv)
+			continue;
+		if (sibling_priv->mmu == priv->mmu) {
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock(&ipmmu_slave_devices_lock);
+
+	return found ? sibling_priv->dev : NULL;
+}
+
+static struct iommu_group *ipmmu_find_group_dma(struct device *dev)
+{
+	struct iommu_group *group;
+	struct device *sibling;
+
+	sibling = ipmmu_find_sibling_device(dev);
+	if (sibling)
+		group = iommu_group_get(sibling);
+	if (!sibling || IS_ERR(group))
+		group = generic_device_group(dev);
+
+	return group;
+}
+
+static int ipmmu_of_xlate_dma(struct device *dev,
+			      struct of_phandle_args *spec)
+{
+	/* If the IPMMU device is disabled in DT then return error
+	 * to make sure the of_iommu code does not install ops
+	 * even though the iommu device is disabled
+	 */
+	if (!of_device_is_available(spec->np))
+		return -ENODEV;
+
+	return ipmmu_init_platform_device(dev);
+}
+
+static const struct iommu_ops ipmmu_ops = {
+	.domain_alloc = ipmmu_domain_alloc_dma,
+	.domain_free = ipmmu_domain_free_dma,
+	.attach_dev = ipmmu_attach_device,
+	.detach_dev = ipmmu_detach_device,
+	.map = ipmmu_map,
+	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
+	.iova_to_phys = ipmmu_iova_to_phys,
+	.add_device = ipmmu_add_device_dma,
+	.remove_device = ipmmu_remove_device_dma,
+	.device_group = ipmmu_find_group_dma,
+	.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
+	.of_xlate = ipmmu_of_xlate_dma,
+};
+
+#endif /* CONFIG_IOMMU_DMA */
+
 /* -----------------------------------------------------------------------------
  * Probe/remove and init
  */
@@ -768,11 +1008,6 @@ static int ipmmu_probe(struct platform_device *pdev)
 	int irq;
 	int ret;
 
-	if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) {
-		dev_err(&pdev->dev, "missing platform data\n");
-		return -EINVAL;
-	}
-
 	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
 	if (!mmu) {
 		dev_err(&pdev->dev, "cannot allocate device data\n");
@@ -781,6 +1016,8 @@ static int ipmmu_probe(struct platform_device *pdev)
 
 	mmu->dev = &pdev->dev;
 	mmu->num_utlbs = 32;
+	spin_lock_init(&mmu->lock);
+	bitmap_zero(mmu->ctx, IPMMU_CTX_MAX);
 
 	/* Map I/O memory and request IRQ. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -840,7 +1077,9 @@ static int ipmmu_remove(struct platform_device *pdev)
 	list_del(&mmu->list);
 	spin_unlock(&ipmmu_devices_lock);
 
+#if defined(CONFIG_ARM) && !defined(CONFIG_IOMMU_DMA)
 	arm_iommu_release_mapping(mmu->mapping);
+#endif
 
 	ipmmu_device_reset(mmu);
 

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 95dfca3..641e035 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c

@@ -1309,7 +1309,7 @@ static void omap_iommu_remove_device(struct device *dev)
 static struct iommu_group *omap_iommu_device_group(struct device *dev)
 {
 	struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
-	struct iommu_group *group = NULL;
+	struct iommu_group *group = ERR_PTR(-EINVAL);
 
 	if (arch_data->iommu_dev)
 		group = arch_data->iommu_dev->group;

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 179e636..8788640 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c

@@ -165,20 +165,14 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 
 static int s390_iommu_add_device(struct device *dev)
 {
-	struct iommu_group *group;
-	int rc;
+	struct iommu_group *group = iommu_group_get_for_dev(dev);
 
-	group = iommu_group_get(dev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group))
-			return PTR_ERR(group);
-	}
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
-	rc = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 
-	return rc;
+	return 0;
 }
 
 static void s390_iommu_remove_device(struct device *dev)
@@ -344,6 +338,7 @@ static struct iommu_ops s390_iommu_ops = {
 	.iova_to_phys = s390_iommu_iova_to_phys,
 	.add_device = s390_iommu_add_device,
 	.remove_device = s390_iommu_remove_device,
+	.device_group = generic_device_group,
 	.pgsize_bitmap = S390_IOMMU_PGSIZES,
 };
 

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 11fe0c5..8150164 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c

@@ -1670,13 +1670,10 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 	queue_work(wq, &line_ws->ws);
 }
 
-void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
-		  unsigned long *lun_bitmap)
+static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list,
+			     int nr_ppas, int pos)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	struct pblk_lun *rlun;
-	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+	struct pblk_lun *rlun = &pblk->luns[pos];
 	int ret;
 
 	/*
@@ -1690,14 +1687,8 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
 				ppa_list[0].g.ch != ppa_list[i].g.ch);
 #endif
-	/* If the LUN has been locked for this same request, do no attempt to
-	 * lock it again
-	 */
-	if (test_and_set_bit(pos, lun_bitmap))
-		return;
 
-	rlun = &pblk->luns[pos];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
+	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000));
 	if (ret) {
 		switch (ret) {
 		case -ETIME:
@@ -1710,6 +1701,50 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 	}
 }
 
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
+		  unsigned long *lun_bitmap)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+	/* If the LUN has been locked for this same request, do no attempt to
+	 * lock it again
+	 */
+	if (test_and_set_bit(pos, lun_bitmap))
+		return;
+
+	__pblk_down_page(pblk, ppa_list, nr_ppas, pos);
+}
+
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_lun *rlun;
+	int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
+
+#ifdef CONFIG_NVM_DEBUG
+	int i;
+
+	for (i = 1; i < nr_ppas; i++)
+		WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
+				ppa_list[0].g.ch != ppa_list[i].g.ch);
+#endif
+
+	rlun = &pblk->luns[pos];
+	up(&rlun->wr_sem);
+}
+
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap)
 {

diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 0e48d3e..cb556e0 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c

@@ -340,9 +340,14 @@ static void pblk_end_io_recov(struct nvm_rq *rqd)
 	struct pblk *pblk = pad_rq->pblk;
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-	kref_put(&pad_rq->ref, pblk_recov_complete);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
+	bio_put(rqd->bio);
 	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, WRITE);
+
+	atomic_dec(&pblk->inflight_io);
+	kref_put(&pad_rq->ref, pblk_recov_complete);
 }
 
 static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
@@ -385,7 +390,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
 	if (rq_ppas < pblk->min_write_pgs) {
 		pr_err("pblk: corrupted pad line %d\n", line->id);
-		goto free_rq;
+		goto fail_free_pad;
 	}
 
 	rq_len = rq_ppas * geo->sec_size;
@@ -393,7 +398,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
 	if (!meta_list) {
 		ret = -ENOMEM;
-		goto free_data;
+		goto fail_free_pad;
 	}
 
 	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
@@ -404,9 +409,9 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 		ret = PTR_ERR(rqd);
 		goto fail_free_meta;
 	}
-	memset(rqd, 0, pblk_w_rq_size);
 
-	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
+	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
+						PBLK_VMALLOC_META, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		ret = PTR_ERR(bio);
 		goto fail_free_rqd;
@@ -453,15 +458,15 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 	}
 
 	kref_get(&pad_rq->ref);
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: I/O submission failed: %d\n", ret);
-		goto free_data;
+		pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+		goto fail_free_bio;
 	}
 
-	atomic_dec(&pblk->inflight_io);
-
 	left_line_ppas -= rq_ppas;
 	left_ppas -= rq_ppas;
 	if (left_ppas && left_line_ppas)
@@ -475,17 +480,23 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
 		ret = -ETIME;
 	}
 
+	if (!pblk_line_is_full(line))
+		pr_err("pblk: corrupted padded line: %d\n", line->id);
+
+	vfree(data);
 free_rq:
 	kfree(pad_rq);
-free_data:
-	vfree(data);
 	return ret;
 
+fail_free_bio:
+	bio_put(bio);
 fail_free_rqd:
 	pblk_free_rqd(pblk, rqd, WRITE);
 fail_free_meta:
 	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+fail_free_pad:
 	kfree(pad_rq);
+	vfree(data);
 	return ret;
 }
 

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index d62a8f4..3ad9e56 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c

@@ -39,9 +39,7 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
 
 	ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
 
-	if (rqd->meta_list)
-		nvm_dev_dma_free(dev->parent, rqd->meta_list,
-							rqd->dma_meta_list);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 
 	bio_put(rqd->bio);
 	pblk_free_rqd(pblk, rqd, WRITE);
@@ -178,15 +176,12 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_line *line = m_ctx->private;
 	struct pblk_emeta *emeta = line->emeta;
-	int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
-	struct pblk_lun *rlun = &pblk->luns[pos];
 	int sync;
 
-	up(&rlun->wr_sem);
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 
 	if (rqd->error) {
 		pblk_log_write_err(pblk, rqd);
@@ -203,6 +198,7 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 								pblk->close_wq);
 
 	bio_put(rqd->bio);
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 	pblk_free_rqd(pblk, rqd, READ);
 
 	atomic_dec(&pblk->inflight_io);
@@ -226,9 +222,6 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	if (!rqd->meta_list)
 		return -ENOMEM;
 
-	if (unlikely(nr_secs == 1))
-		return 0;
-
 	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
 	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
@@ -367,7 +360,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_emeta *emeta = meta_line->emeta;
 	struct pblk_g_ctx *m_ctx;
-	struct pblk_lun *rlun;
 	struct bio *bio;
 	struct nvm_rq *rqd;
 	void *data;
@@ -411,13 +403,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
 	}
 
-	rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
-	ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
-	if (ret) {
-		pr_err("pblk: lun semaphore timed out (%d)\n", ret);
-		goto fail_free_bio;
-	}
-
 	emeta->mem += rq_len;
 	if (emeta->mem >= lm->emeta_len[0]) {
 		spin_lock(&l_mg->close_lock);
@@ -427,6 +412,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 		spin_unlock(&l_mg->close_lock);
 	}
 
+	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
 		pr_err("pblk: emeta I/O submission failed: %d\n", ret);
@@ -436,10 +423,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	return NVM_IO_OK;
 
 fail_rollback:
+	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
 	spin_lock(&l_mg->close_lock);
 	pblk_dealloc_page(pblk, meta_line, rq_ppas);
 	list_add(&meta_line->list, &meta_line->list);
 	spin_unlock(&l_mg->close_lock);
+
+	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
 fail_free_bio:
 	if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
 		bio_put(bio);

diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 1593138..0c5692c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h

@@ -739,8 +739,10 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush);
+void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		  unsigned long *lun_bitmap);
+void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap);
 void pblk_end_bio_sync(struct bio *bio);

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 10cabe9..2edbcc2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -1279,7 +1279,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (unlikely(bio_integrity(bio) != NULL))
-		bio_integrity_trim(clone, 0, len);
+		bio_integrity_trim(clone);
 
 	return 0;
 }

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index dc6ce90..b0ca5a4c 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c

@@ -54,12 +54,19 @@ static const struct mfd_cell ec_pd_cell = {
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
+	bool wake_event = true;
 	int ret;
 
-	if (device_may_wakeup(ec_dev->dev))
+	ret = cros_ec_get_next_event(ec_dev, &wake_event);
+
+	/*
+	 * Signal only if wake host events or any interrupt if
+	 * cros_ec_get_next_event() returned an error (default value for
+	 * wake_event is true)
+	 */
+	if (wake_event && device_may_wakeup(ec_dev->dev))
 		pm_wakeup_event(ec_dev->dev, 0);
 
-	ret = cros_ec_get_next_event(ec_dev);
 	if (ret > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     0, ec_dev);
@@ -224,7 +231,7 @@ EXPORT_SYMBOL(cros_ec_suspend);
 
 static void cros_ec_drain_events(struct cros_ec_device *ec_dev)
 {
-	while (cros_ec_get_next_event(ec_dev) > 0)
+	while (cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
 					     1, ec_dev);
 }

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 0cfac2d..8ac59dc 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c

@@ -637,6 +637,9 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 			   sizeof(num_of_cmds)))
 		return -EFAULT;
 
+	if (!num_of_cmds)
+		return 0;
+
 	if (num_of_cmds > MMC_IOC_MAX_CMDS)
 		return -EINVAL;
 
@@ -1182,7 +1185,7 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
 
 	switch (mq_rq->drv_op) {
 	case MMC_DRV_OP_IOCTL:
-		for (i = 0; i < mq_rq->ioc_count; i++) {
+		for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) {
 			ret = __mmc_blk_ioctl_cmd(card, md, mq_rq->idata[i]);
 			if (ret)
 				break;
@@ -2167,6 +2170,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 		 * from being accepted.
 		 */
 		card = md->queue.card;
+		blk_set_queue_dying(md->queue.queue);
 		mmc_cleanup_queue(&md->queue);
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index cf66a3d..ac678e9 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c

@@ -45,6 +45,7 @@
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
+#include <linux/pci.h>
 #endif
 
 #include "sdhci.h"
@@ -134,6 +135,16 @@ static bool sdhci_acpi_byt(void)
 	return x86_match_cpu(byt);
 }
 
+static bool sdhci_acpi_cht(void)
+{
+	static const struct x86_cpu_id cht[] = {
+		{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+		{}
+	};
+
+	return x86_match_cpu(cht);
+}
+
 #define BYT_IOSF_SCCEP			0x63
 #define BYT_IOSF_OCP_NETCTRL0		0x1078
 #define BYT_IOSF_OCP_TIMEOUT_BASE	GENMASK(10, 8)
@@ -178,6 +189,45 @@ static bool sdhci_acpi_byt_defer(struct device *dev)
 	return false;
 }
 
+static bool sdhci_acpi_cht_pci_wifi(unsigned int vendor, unsigned int device,
+				    unsigned int slot, unsigned int parent_slot)
+{
+	struct pci_dev *dev, *parent, *from = NULL;
+
+	while (1) {
+		dev = pci_get_device(vendor, device, from);
+		pci_dev_put(from);
+		if (!dev)
+			break;
+		parent = pci_upstream_bridge(dev);
+		if (ACPI_COMPANION(&dev->dev) && PCI_SLOT(dev->devfn) == slot &&
+		    parent && PCI_SLOT(parent->devfn) == parent_slot &&
+		    !pci_upstream_bridge(parent)) {
+			pci_dev_put(dev);
+			return true;
+		}
+		from = dev;
+	}
+
+	return false;
+}
+
+/*
+ * GPDwin uses PCI wifi which conflicts with SDIO's use of
+ * acpi_device_fix_up_power() on child device nodes. Identifying GPDwin is
+ * problematic, but since SDIO is only used for wifi, the presence of the PCI
+ * wifi card in the expected slot with an ACPI companion node, is used to
+ * indicate that acpi_device_fix_up_power() should be avoided.
+ */
+static inline bool sdhci_acpi_no_fixup_child_power(const char *hid,
+						   const char *uid)
+{
+	return sdhci_acpi_cht() &&
+	       !strcmp(hid, "80860F14") &&
+	       !strcmp(uid, "2") &&
+	       sdhci_acpi_cht_pci_wifi(0x14e4, 0x43ec, 0, 28);
+}
+
 #else
 
 static inline void sdhci_acpi_byt_setting(struct device *dev)
@@ -189,6 +239,12 @@ static inline bool sdhci_acpi_byt_defer(struct device *dev)
 	return false;
 }
 
+static inline bool sdhci_acpi_no_fixup_child_power(const char *hid,
+						   const char *uid)
+{
+	return false;
+}
+
 #endif
 
 static int bxt_get_cd(struct mmc_host *mmc)
@@ -389,18 +445,20 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	if (acpi_bus_get_device(handle, &device))
 		return -ENODEV;
 
+	hid = acpi_device_hid(device);
+	uid = device->pnp.unique_id;
+
 	/* Power on the SDHCI controller and its children */
 	acpi_device_fix_up_power(device);
-	list_for_each_entry(child, &device->children, node)
-		if (child->status.present && child->status.enabled)
-			acpi_device_fix_up_power(child);
+	if (!sdhci_acpi_no_fixup_child_power(hid, uid)) {
+		list_for_each_entry(child, &device->children, node)
+			if (child->status.present && child->status.enabled)
+				acpi_device_fix_up_power(child);
+	}
 
 	if (sdhci_acpi_byt_defer(dev))
 		return -EPROBE_DEFER;
 
-	hid = acpi_device_hid(device);
-	uid = device->pnp.unique_id;
-
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iomem)
 		return -ENOMEM;

diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 82b80d4..88a9435 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c

@@ -409,30 +409,29 @@ static void tmio_mmc_transfer_data(struct tmio_mmc_host *host,
 	 * Transfer the data
 	 */
 	if (host->pdata->flags & TMIO_MMC_32BIT_DATA_PORT) {
-		u8 data[4] = { };
+		u32 data = 0;
+		u32 *buf32 = (u32 *)buf;
 
 		if (is_read)
-			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, buf32,
 					   count >> 2);
 		else
-			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, (u32 *)buf,
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, buf32,
 					    count >> 2);
 
 		/* if count was multiple of 4 */
 		if (!(count & 0x3))
 			return;
 
-		buf8 = (u8 *)(buf + (count >> 2));
+		buf32 += count >> 2;
 		count %= 4;
 
 		if (is_read) {
-			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT,
-					   (u32 *)data, 1);
-			memcpy(buf8, data, count);
+			sd_ctrl_read32_rep(host, CTL_SD_DATA_PORT, &data, 1);
+			memcpy(buf32, &data, count);
 		} else {
-			memcpy(data, buf8, count);
-			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT,
-					    (u32 *)data, 1);
+			memcpy(&data, buf32, count);
+			sd_ctrl_write32_rep(host, CTL_SD_DATA_PORT, &data, 1);
 		}
 
 		return;

diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index e15a973..9668616 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c

@@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma)
 	 * order for ISA to be able to DMA to it.
 	 */
 	host->dma_buffer = kmalloc(WBSD_DMA_SIZE,
-		GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN);
+		GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (!host->dma_buffer)
 		goto free;
 

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index e83a279..5a2d717 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig

@@ -155,6 +155,10 @@
 	  This provides partitions parser for devices based on BCM47xx
 	  boards.
 
+menu "Partition parsers"
+source "drivers/mtd/parsers/Kconfig"
+endmenu
+
 comment "User Modules And Translation Layers"
 
 #

diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 99bb9a1..151d60df 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile

@@ -13,6 +13,7 @@
 obj-$(CONFIG_MTD_AR7_PARTS)	+= ar7part.o
 obj-$(CONFIG_MTD_BCM63XX_PARTS)	+= bcm63xxpart.o
 obj-$(CONFIG_MTD_BCM47XX_PARTS)	+= bcm47xxpart.o
+obj-y				+= parsers/
 
 # 'Users' - code which presents functionality to userspace.
 obj-$(CONFIG_MTD_BLKDEVS)	+= mtd_blkdevs.o

diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index d10fa6c..fe2581d 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c

@@ -43,7 +43,8 @@
 #define ML_MAGIC2			0x26594131
 #define TRX_MAGIC			0x30524448
 #define SHSQ_MAGIC			0x71736873	/* shsq (weird ZTE H218N endianness) */
-#define UBI_EC_MAGIC			0x23494255	/* UBI# */
+
+static const char * const trx_types[] = { "trx", NULL };
 
 struct trx_header {
 	uint32_t magic;
@@ -62,89 +63,6 @@ static void bcm47xxpart_add_part(struct mtd_partition *part, const char *name,
 	part->mask_flags = mask_flags;
 }
 
-static const char *bcm47xxpart_trx_data_part_name(struct mtd_info *master,
-						  size_t offset)
-{
-	uint32_t buf;
-	size_t bytes_read;
-	int err;
-
-	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
-			(uint8_t *)&buf);
-	if (err && !mtd_is_bitflip(err)) {
-		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
-			offset, err);
-		goto out_default;
-	}
-
-	if (buf == UBI_EC_MAGIC)
-		return "ubi";
-
-out_default:
-	return "rootfs";
-}
-
-static int bcm47xxpart_parse_trx(struct mtd_info *master,
-				 struct mtd_partition *trx,
-				 struct mtd_partition *parts,
-				 size_t parts_len)
-{
-	struct trx_header header;
-	size_t bytes_read;
-	int curr_part = 0;
-	int i, err;
-
-	if (parts_len < 3) {
-		pr_warn("No enough space to add TRX partitions!\n");
-		return -ENOMEM;
-	}
-
-	err = mtd_read(master, trx->offset, sizeof(header), &bytes_read,
-		       (uint8_t *)&header);
-	if (err && !mtd_is_bitflip(err)) {
-		pr_err("mtd_read error while reading TRX header: %d\n", err);
-		return err;
-	}
-
-	i = 0;
-
-	/* We have LZMA loader if offset[2] points to sth */
-	if (header.offset[2]) {
-		bcm47xxpart_add_part(&parts[curr_part++], "loader",
-				     trx->offset + header.offset[i], 0);
-		i++;
-	}
-
-	if (header.offset[i]) {
-		bcm47xxpart_add_part(&parts[curr_part++], "linux",
-				     trx->offset + header.offset[i], 0);
-		i++;
-	}
-
-	if (header.offset[i]) {
-		size_t offset = trx->offset + header.offset[i];
-		const char *name = bcm47xxpart_trx_data_part_name(master,
-								  offset);
-
-		bcm47xxpart_add_part(&parts[curr_part++], name, offset, 0);
-		i++;
-	}
-
-	/*
-	 * Assume that every partition ends at the beginning of the one it is
-	 * followed by.
-	 */
-	for (i = 0; i < curr_part; i++) {
-		u64 next_part_offset = (i < curr_part - 1) ?
-					parts[i + 1].offset :
-					trx->offset + trx->size;
-
-		parts[i].size = next_part_offset - parts[i].offset;
-	}
-
-	return curr_part;
-}
-
 /**
  * bcm47xxpart_bootpartition - gets index of TRX partition used by bootloader
  *
@@ -362,17 +280,10 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	for (i = 0; i < trx_num; i++) {
 		struct mtd_partition *trx = &parts[trx_parts[i]];
 
-		if (i == bcm47xxpart_bootpartition()) {
-			int num_parts;
-
-			num_parts = bcm47xxpart_parse_trx(master, trx,
-							  parts + curr_part,
-							  BCM47XXPART_MAX_PARTS - curr_part);
-			if (num_parts > 0)
-				curr_part += num_parts;
-		} else {
+		if (i == bcm47xxpart_bootpartition())
+			trx->types = trx_types;
+		else
 			trx->name = "failsafe";
-		}
 	}
 
 	*pparts = parts;

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 94d3eb4..7d34296 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c

@@ -666,7 +666,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	size_t	 totlen = 0, thislen;
 	int	 ret = 0;
 	size_t	 buflen = 0;
-	static char *buffer;
+	char *buffer;
 
 	if (!ECCBUF_SIZE) {
 		/* We should fall back to a general writev implementation.

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 58329d2..6def544 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig

@@ -95,6 +95,16 @@
 	  if you want to specify device partitioning or to use a device which
 	  doesn't support the JEDEC ID instruction.
 
+config MTD_MCHP23K256
+	tristate "Microchip 23K256 SRAM"
+	depends on SPI_MASTER
+	help
+	  This enables access to Microchip 23K256 SRAM chips, using SPI.
+
+	  Set up your spi devices with the right board-specific
+	  platform data, or a device tree description if you want to
+	  specify device partitioning
+
 config MTD_SPEAR_SMI
 	tristate "SPEAR MTD NOR Support through SMI controller"
 	depends on PLAT_SPEAR

diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 7912d3a..f0f7676 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile

@@ -12,6 +12,7 @@
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
+obj-$(CONFIG_MTD_MCHP23K256)	+= mchp23k256.o
 obj-$(CONFIG_MTD_SPEAR_SMI)	+= spear_smi.o
 obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)	+= bcm47xxsflash.o

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index c4df3b1..00eea6f 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c

@@ -78,11 +78,17 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 {
 	struct m25p *flash = nor->priv;
 	struct spi_device *spi = flash->spi;
-	struct spi_transfer t[2] = {};
+	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
+	struct spi_transfer t[3] = {};
 	struct spi_message m;
 	int cmd_sz = m25p_cmdsz(nor);
 	ssize_t ret;
 
+	/* get transfer protocols. */
+	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+	data_nbits = spi_nor_get_protocol_data_nbits(nor->write_proto);
+
 	spi_message_init(&m);
 
 	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
@@ -92,12 +98,27 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 	m25p_addr2cmd(nor, to, flash->command);
 
 	t[0].tx_buf = flash->command;
+	t[0].tx_nbits = inst_nbits;
 	t[0].len = cmd_sz;
 	spi_message_add_tail(&t[0], &m);
 
-	t[1].tx_buf = buf;
-	t[1].len = len;
-	spi_message_add_tail(&t[1], &m);
+	/* split the op code and address bytes into two transfers if needed. */
+	data_idx = 1;
+	if (addr_nbits != inst_nbits) {
+		t[0].len = 1;
+
+		t[1].tx_buf = &flash->command[1];
+		t[1].tx_nbits = addr_nbits;
+		t[1].len = cmd_sz - 1;
+		spi_message_add_tail(&t[1], &m);
+
+		data_idx = 2;
+	}
+
+	t[data_idx].tx_buf = buf;
+	t[data_idx].tx_nbits = data_nbits;
+	t[data_idx].len = len;
+	spi_message_add_tail(&t[data_idx], &m);
 
 	ret = spi_sync(spi, &m);
 	if (ret)
@@ -109,18 +130,6 @@ static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 	return ret;
 }
 
-static inline unsigned int m25p80_rx_nbits(struct spi_nor *nor)
-{
-	switch (nor->flash_read) {
-	case SPI_NOR_DUAL:
-		return 2;
-	case SPI_NOR_QUAD:
-		return 4;
-	default:
-		return 0;
-	}
-}
-
 /*
  * Read an address range from the nor chip.  The address range
  * may be any size provided it is within the physical boundaries.
@@ -130,13 +139,20 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 {
 	struct m25p *flash = nor->priv;
 	struct spi_device *spi = flash->spi;
-	struct spi_transfer t[2];
+	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
+	struct spi_transfer t[3];
 	struct spi_message m;
 	unsigned int dummy = nor->read_dummy;
 	ssize_t ret;
+	int cmd_sz;
+
+	/* get transfer protocols. */
+	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+	data_nbits = spi_nor_get_protocol_data_nbits(nor->read_proto);
 
 	/* convert the dummy cycles to the number of bytes */
-	dummy /= 8;
+	dummy = (dummy * addr_nbits) / 8;
 
 	if (spi_flash_read_supported(spi)) {
 		struct spi_flash_read_message msg;
@@ -149,10 +165,9 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 		msg.read_opcode = nor->read_opcode;
 		msg.addr_width = nor->addr_width;
 		msg.dummy_bytes = dummy;
-		/* TODO: Support other combinations */
-		msg.opcode_nbits = SPI_NBITS_SINGLE;
-		msg.addr_nbits = SPI_NBITS_SINGLE;
-		msg.data_nbits = m25p80_rx_nbits(nor);
+		msg.opcode_nbits = inst_nbits;
+		msg.addr_nbits = addr_nbits;
+		msg.data_nbits = data_nbits;
 
 		ret = spi_flash_read(spi, &msg);
 		if (ret < 0)
@@ -167,20 +182,45 @@ static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 	m25p_addr2cmd(nor, from, flash->command);
 
 	t[0].tx_buf = flash->command;
+	t[0].tx_nbits = inst_nbits;
 	t[0].len = m25p_cmdsz(nor) + dummy;
 	spi_message_add_tail(&t[0], &m);
 
-	t[1].rx_buf = buf;
-	t[1].rx_nbits = m25p80_rx_nbits(nor);
-	t[1].len = min3(len, spi_max_transfer_size(spi),
-			spi_max_message_size(spi) - t[0].len);
-	spi_message_add_tail(&t[1], &m);
+	/*
+	 * Set all dummy/mode cycle bits to avoid sending some manufacturer
+	 * specific pattern, which might make the memory enter its Continuous
+	 * Read mode by mistake.
+	 * Based on the different mode cycle bit patterns listed and described
+	 * in the JESD216B specification, the 0xff value works for all memories
+	 * and all manufacturers.
+	 */
+	cmd_sz = t[0].len;
+	memset(flash->command + cmd_sz - dummy, 0xff, dummy);
+
+	/* split the op code and address bytes into two transfers if needed. */
+	data_idx = 1;
+	if (addr_nbits != inst_nbits) {
+		t[0].len = 1;
+
+		t[1].tx_buf = &flash->command[1];
+		t[1].tx_nbits = addr_nbits;
+		t[1].len = cmd_sz - 1;
+		spi_message_add_tail(&t[1], &m);
+
+		data_idx = 2;
+	}
+
+	t[data_idx].rx_buf = buf;
+	t[data_idx].rx_nbits = data_nbits;
+	t[data_idx].len = min3(len, spi_max_transfer_size(spi),
+			       spi_max_message_size(spi) - cmd_sz);
+	spi_message_add_tail(&t[data_idx], &m);
 
 	ret = spi_sync(spi, &m);
 	if (ret)
 		return ret;
 
-	ret = m.actual_length - m25p_cmdsz(nor) - dummy;
+	ret = m.actual_length - cmd_sz;
 	if (ret < 0)
 		return -EIO;
 	return ret;
@@ -196,7 +236,11 @@ static int m25p_probe(struct spi_device *spi)
 	struct flash_platform_data	*data;
 	struct m25p *flash;
 	struct spi_nor *nor;
-	enum read_mode mode = SPI_NOR_NORMAL;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	char *flash_name;
 	int ret;
 
@@ -221,10 +265,19 @@ static int m25p_probe(struct spi_device *spi)
 	spi_set_drvdata(spi, flash);
 	flash->spi = spi;
 
-	if (spi->mode & SPI_RX_QUAD)
-		mode = SPI_NOR_QUAD;
-	else if (spi->mode & SPI_RX_DUAL)
-		mode = SPI_NOR_DUAL;
+	if (spi->mode & SPI_RX_QUAD) {
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+
+		if (spi->mode & SPI_TX_QUAD)
+			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
+					SNOR_HWCAPS_PP_1_1_4 |
+					SNOR_HWCAPS_PP_1_4_4);
+	} else if (spi->mode & SPI_RX_DUAL) {
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+
+		if (spi->mode & SPI_TX_DUAL)
+			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
+	}
 
 	if (data && data->name)
 		nor->mtd.name = data->name;
@@ -241,7 +294,7 @@ static int m25p_probe(struct spi_device *spi)
 	else
 		flash_name = spi->modalias;
 
-	ret = spi_nor_scan(nor, flash_name, mode);
+	ret = spi_nor_scan(nor, flash_name, &hwcaps);
 	if (ret)
 		return ret;
 

diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
new file mode 100644
index 0000000..8956b7d
--- /dev/null
+++ b/drivers/mtd/devices/mchp23k256.c

@@ -0,0 +1,236 @@
+/*
+ * mchp23k256.c
+ *
+ * Driver for Microchip 23k256 SPI RAM chips
+ *
+ * Copyright © 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spi/flash.h>
+#include <linux/spi/spi.h>
+#include <linux/of_device.h>
+
+#define MAX_CMD_SIZE		4
+
+struct mchp23_caps {
+	u8 addr_width;
+	unsigned int size;
+};
+
+struct mchp23k256_flash {
+	struct spi_device	*spi;
+	struct mutex		lock;
+	struct mtd_info		mtd;
+	const struct mchp23_caps	*caps;
+};
+
+#define MCHP23K256_CMD_WRITE_STATUS	0x01
+#define MCHP23K256_CMD_WRITE		0x02
+#define MCHP23K256_CMD_READ		0x03
+#define MCHP23K256_MODE_SEQ		BIT(6)
+
+#define to_mchp23k256_flash(x) container_of(x, struct mchp23k256_flash, mtd)
+
+static void mchp23k256_addr2cmd(struct mchp23k256_flash *flash,
+				unsigned int addr, u8 *cmd)
+{
+	int i;
+
+	/*
+	 * Address is sent in big endian (MSB first) and we skip
+	 * the first entry of the cmd array which contains the cmd
+	 * opcode.
+	 */
+	for (i = flash->caps->addr_width; i > 0; i--, addr >>= 8)
+		cmd[i] = addr;
+}
+
+static int mchp23k256_cmdsz(struct mchp23k256_flash *flash)
+{
+	return 1 + flash->caps->addr_width;
+}
+
+static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const unsigned char *buf)
+{
+	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
+	struct spi_transfer transfer[2] = {};
+	struct spi_message message;
+	unsigned char command[MAX_CMD_SIZE];
+
+	spi_message_init(&message);
+
+	command[0] = MCHP23K256_CMD_WRITE;
+	mchp23k256_addr2cmd(flash, to, command);
+
+	transfer[0].tx_buf = command;
+	transfer[0].len = mchp23k256_cmdsz(flash);
+	spi_message_add_tail(&transfer[0], &message);
+
+	transfer[1].tx_buf = buf;
+	transfer[1].len = len;
+	spi_message_add_tail(&transfer[1], &message);
+
+	mutex_lock(&flash->lock);
+
+	spi_sync(flash->spi, &message);
+
+	if (retlen && message.actual_length > sizeof(command))
+		*retlen += message.actual_length - sizeof(command);
+
+	mutex_unlock(&flash->lock);
+	return 0;
+}
+
+static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len,
+			   size_t *retlen, unsigned char *buf)
+{
+	struct mchp23k256_flash *flash = to_mchp23k256_flash(mtd);
+	struct spi_transfer transfer[2] = {};
+	struct spi_message message;
+	unsigned char command[MAX_CMD_SIZE];
+
+	spi_message_init(&message);
+
+	memset(&transfer, 0, sizeof(transfer));
+	command[0] = MCHP23K256_CMD_READ;
+	mchp23k256_addr2cmd(flash, from, command);
+
+	transfer[0].tx_buf = command;
+	transfer[0].len = mchp23k256_cmdsz(flash);
+	spi_message_add_tail(&transfer[0], &message);
+
+	transfer[1].rx_buf = buf;
+	transfer[1].len = len;
+	spi_message_add_tail(&transfer[1], &message);
+
+	mutex_lock(&flash->lock);
+
+	spi_sync(flash->spi, &message);
+
+	if (retlen && message.actual_length > sizeof(command))
+		*retlen += message.actual_length - sizeof(command);
+
+	mutex_unlock(&flash->lock);
+	return 0;
+}
+
+/*
+ * Set the device into sequential mode. This allows read/writes to the
+ * entire SRAM in a single operation
+ */
+static int mchp23k256_set_mode(struct spi_device *spi)
+{
+	struct spi_transfer transfer = {};
+	struct spi_message message;
+	unsigned char command[2];
+
+	spi_message_init(&message);
+
+	command[0] = MCHP23K256_CMD_WRITE_STATUS;
+	command[1] = MCHP23K256_MODE_SEQ;
+
+	transfer.tx_buf = command;
+	transfer.len = sizeof(command);
+	spi_message_add_tail(&transfer, &message);
+
+	return spi_sync(spi, &message);
+}
+
+static const struct mchp23_caps mchp23k256_caps = {
+	.size = SZ_32K,
+	.addr_width = 2,
+};
+
+static const struct mchp23_caps mchp23lcv1024_caps = {
+	.size = SZ_128K,
+	.addr_width = 3,
+};
+
+static int mchp23k256_probe(struct spi_device *spi)
+{
+	struct mchp23k256_flash *flash;
+	struct flash_platform_data *data;
+	int err;
+
+	flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL);
+	if (!flash)
+		return -ENOMEM;
+
+	flash->spi = spi;
+	mutex_init(&flash->lock);
+	spi_set_drvdata(spi, flash);
+
+	err = mchp23k256_set_mode(spi);
+	if (err)
+		return err;
+
+	data = dev_get_platdata(&spi->dev);
+
+	flash->caps = of_device_get_match_data(&spi->dev);
+	if (!flash->caps)
+		flash->caps = &mchp23k256_caps;
+
+	mtd_set_of_node(&flash->mtd, spi->dev.of_node);
+	flash->mtd.dev.parent	= &spi->dev;
+	flash->mtd.type		= MTD_RAM;
+	flash->mtd.flags	= MTD_CAP_RAM;
+	flash->mtd.writesize	= 1;
+	flash->mtd.size		= flash->caps->size;
+	flash->mtd._read	= mchp23k256_read;
+	flash->mtd._write	= mchp23k256_write;
+
+	err = mtd_device_register(&flash->mtd, data ? data->parts : NULL,
+				  data ? data->nr_parts : 0);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mchp23k256_remove(struct spi_device *spi)
+{
+	struct mchp23k256_flash *flash = spi_get_drvdata(spi);
+
+	return mtd_device_unregister(&flash->mtd);
+}
+
+static const struct of_device_id mchp23k256_of_table[] = {
+	{
+		.compatible = "microchip,mchp23k256",
+		.data = &mchp23k256_caps,
+	},
+	{
+		.compatible = "microchip,mchp23lcv1024",
+		.data = &mchp23lcv1024_caps,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mchp23k256_of_table);
+
+static struct spi_driver mchp23k256_driver = {
+	.driver = {
+		.name	= "mchp23k256",
+		.of_match_table = of_match_ptr(mchp23k256_of_table),
+	},
+	.probe		= mchp23k256_probe,
+	.remove		= mchp23k256_remove,
+};
+
+module_spi_driver(mchp23k256_driver);
+
+MODULE_DESCRIPTION("MTD SPI driver for MCHP23K256 RAM chips");
+MODULE_AUTHOR("Andrew Lunn <andre@lunn.ch>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:mchp23k256");

diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index f9e9bd1..5dc8bd0 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c

@@ -82,9 +82,13 @@
 #define OP_WRITE_SECURITY_REVC	0x9A
 #define OP_WRITE_SECURITY	0x9B	/* revision D */
 
+#define CFI_MFR_ATMEL		0x1F
+
+#define DATAFLASH_SHIFT_EXTID	24
+#define DATAFLASH_SHIFT_ID	40
 
 struct dataflash {
-	uint8_t			command[4];
+	u8			command[4];
 	char			name[24];
 
 	unsigned short		page_offset;	/* offset in flash address */
@@ -129,8 +133,7 @@ static int dataflash_waitready(struct spi_device *spi)
 	for (;;) {
 		status = dataflash_status(spi);
 		if (status < 0) {
-			pr_debug("%s: status %d?\n",
-					dev_name(&spi->dev), status);
+			dev_dbg(&spi->dev, "status %d?\n", status);
 			status = 0;
 		}
 
@@ -153,12 +156,11 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 	struct spi_transfer	x = { };
 	struct spi_message	msg;
 	unsigned		blocksize = priv->page_size << 3;
-	uint8_t			*command;
-	uint32_t		rem;
+	u8			*command;
+	u32			rem;
 
-	pr_debug("%s: erase addr=0x%llx len 0x%llx\n",
-	      dev_name(&spi->dev), (long long)instr->addr,
-	      (long long)instr->len);
+	dev_dbg(&spi->dev, "erase addr=0x%llx len 0x%llx\n",
+		(long long)instr->addr, (long long)instr->len);
 
 	div_u64_rem(instr->len, priv->page_size, &rem);
 	if (rem)
@@ -187,11 +189,11 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		pageaddr = pageaddr << priv->page_offset;
 
 		command[0] = do_block ? OP_ERASE_BLOCK : OP_ERASE_PAGE;
-		command[1] = (uint8_t)(pageaddr >> 16);
-		command[2] = (uint8_t)(pageaddr >> 8);
+		command[1] = (u8)(pageaddr >> 16);
+		command[2] = (u8)(pageaddr >> 8);
 		command[3] = 0;
 
-		pr_debug("ERASE %s: (%x) %x %x %x [%i]\n",
+		dev_dbg(&spi->dev, "ERASE %s: (%x) %x %x %x [%i]\n",
 			do_block ? "block" : "page",
 			command[0], command[1], command[2], command[3],
 			pageaddr);
@@ -200,8 +202,8 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 		(void) dataflash_waitready(spi);
 
 		if (status < 0) {
-			printk(KERN_ERR "%s: erase %x, err %d\n",
-				dev_name(&spi->dev), pageaddr, status);
+			dev_err(&spi->dev, "erase %x, err %d\n",
+				pageaddr, status);
 			/* REVISIT:  can retry instr->retries times; or
 			 * giveup and instr->fail_addr = instr->addr;
 			 */
@@ -239,11 +241,11 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct spi_transfer	x[2] = { };
 	struct spi_message	msg;
 	unsigned int		addr;
-	uint8_t			*command;
+	u8			*command;
 	int			status;
 
-	pr_debug("%s: read 0x%x..0x%x\n", dev_name(&priv->spi->dev),
-			(unsigned)from, (unsigned)(from + len));
+	dev_dbg(&priv->spi->dev, "read 0x%x..0x%x\n",
+		  (unsigned int)from, (unsigned int)(from + len));
 
 	/* Calculate flash page/byte address */
 	addr = (((unsigned)from / priv->page_size) << priv->page_offset)
@@ -251,7 +253,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 
 	command = priv->command;
 
-	pr_debug("READ: (%x) %x %x %x\n",
+	dev_dbg(&priv->spi->dev, "READ: (%x) %x %x %x\n",
 		command[0], command[1], command[2], command[3]);
 
 	spi_message_init(&msg);
@@ -271,9 +273,9 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	 * fewer "don't care" bytes.  Both buffers stay unchanged.
 	 */
 	command[0] = OP_READ_CONTINUOUS;
-	command[1] = (uint8_t)(addr >> 16);
-	command[2] = (uint8_t)(addr >> 8);
-	command[3] = (uint8_t)(addr >> 0);
+	command[1] = (u8)(addr >> 16);
+	command[2] = (u8)(addr >> 8);
+	command[3] = (u8)(addr >> 0);
 	/* plus 4 "don't care" bytes */
 
 	status = spi_sync(priv->spi, &msg);
@@ -283,8 +285,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 		*retlen = msg.actual_length - 8;
 		status = 0;
 	} else
-		pr_debug("%s: read %x..%x --> %d\n",
-			dev_name(&priv->spi->dev),
+		dev_dbg(&priv->spi->dev, "read %x..%x --> %d\n",
 			(unsigned)from, (unsigned)(from + len),
 			status);
 	return status;
@@ -308,10 +309,10 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 	size_t			remaining = len;
 	u_char			*writebuf = (u_char *) buf;
 	int			status = -EINVAL;
-	uint8_t			*command;
+	u8			*command;
 
-	pr_debug("%s: write 0x%x..0x%x\n",
-		dev_name(&spi->dev), (unsigned)to, (unsigned)(to + len));
+	dev_dbg(&spi->dev, "write 0x%x..0x%x\n",
+		(unsigned int)to, (unsigned int)(to + len));
 
 	spi_message_init(&msg);
 
@@ -328,7 +329,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	mutex_lock(&priv->lock);
 	while (remaining > 0) {
-		pr_debug("write @ %i:%i len=%i\n",
+		dev_dbg(&spi->dev, "write @ %i:%i len=%i\n",
 			pageaddr, offset, writelen);
 
 		/* REVISIT:
@@ -356,13 +357,13 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 			command[2] = (addr & 0x0000FF00) >> 8;
 			command[3] = 0;
 
-			pr_debug("TRANSFER: (%x) %x %x %x\n",
+			dev_dbg(&spi->dev, "TRANSFER: (%x) %x %x %x\n",
 				command[0], command[1], command[2], command[3]);
 
 			status = spi_sync(spi, &msg);
 			if (status < 0)
-				pr_debug("%s: xfer %u -> %d\n",
-					dev_name(&spi->dev), addr, status);
+				dev_dbg(&spi->dev, "xfer %u -> %d\n",
+					addr, status);
 
 			(void) dataflash_waitready(priv->spi);
 		}
@@ -374,7 +375,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		command[2] = (addr & 0x0000FF00) >> 8;
 		command[3] = (addr & 0x000000FF);
 
-		pr_debug("PROGRAM: (%x) %x %x %x\n",
+		dev_dbg(&spi->dev, "PROGRAM: (%x) %x %x %x\n",
 			command[0], command[1], command[2], command[3]);
 
 		x[1].tx_buf = writebuf;
@@ -383,8 +384,8 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		status = spi_sync(spi, &msg);
 		spi_transfer_del(x + 1);
 		if (status < 0)
-			pr_debug("%s: pgm %u/%u -> %d\n",
-				dev_name(&spi->dev), addr, writelen, status);
+			dev_dbg(&spi->dev, "pgm %u/%u -> %d\n",
+				addr, writelen, status);
 
 		(void) dataflash_waitready(priv->spi);
 
@@ -398,20 +399,20 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		command[2] = (addr & 0x0000FF00) >> 8;
 		command[3] = 0;
 
-		pr_debug("COMPARE: (%x) %x %x %x\n",
+		dev_dbg(&spi->dev, "COMPARE: (%x) %x %x %x\n",
 			command[0], command[1], command[2], command[3]);
 
 		status = spi_sync(spi, &msg);
 		if (status < 0)
-			pr_debug("%s: compare %u -> %d\n",
-				dev_name(&spi->dev), addr, status);
+			dev_dbg(&spi->dev, "compare %u -> %d\n",
+				addr, status);
 
 		status = dataflash_waitready(priv->spi);
 
 		/* Check result of the compare operation */
 		if (status & (1 << 6)) {
-			printk(KERN_ERR "%s: compare page %u, err %d\n",
-				dev_name(&spi->dev), pageaddr, status);
+			dev_err(&spi->dev, "compare page %u, err %d\n",
+				pageaddr, status);
 			remaining = 0;
 			status = -EIO;
 			break;
@@ -455,11 +456,11 @@ static int dataflash_get_otp_info(struct mtd_info *mtd, size_t len,
 }
 
 static ssize_t otp_read(struct spi_device *spi, unsigned base,
-		uint8_t *buf, loff_t off, size_t len)
+		u8 *buf, loff_t off, size_t len)
 {
 	struct spi_message	m;
 	size_t			l;
-	uint8_t			*scratch;
+	u8			*scratch;
 	struct spi_transfer	t;
 	int			status;
 
@@ -538,7 +539,7 @@ static int dataflash_write_user_otp(struct mtd_info *mtd,
 {
 	struct spi_message	m;
 	const size_t		l = 4 + 64;
-	uint8_t			*scratch;
+	u8			*scratch;
 	struct spi_transfer	t;
 	struct dataflash	*priv = mtd->priv;
 	int			status;
@@ -689,14 +690,15 @@ struct flash_info {
 	/* JEDEC id has a high byte of zero plus three data bytes:
 	 * the manufacturer id, then a two byte device id.
 	 */
-	uint32_t	jedec_id;
+	u64		jedec_id;
 
 	/* The size listed here is what works with OP_ERASE_PAGE. */
 	unsigned	nr_pages;
-	uint16_t	pagesize;
-	uint16_t	pageoffset;
+	u16		pagesize;
+	u16		pageoffset;
 
-	uint16_t	flags;
+	u16		flags;
+#define SUP_EXTID	0x0004		/* supports extended ID data */
 #define SUP_POW2PS	0x0002		/* supports 2^N byte pages */
 #define IS_POW2PS	0x0001		/* uses 2^N byte pages */
 };
@@ -734,54 +736,32 @@ static struct flash_info dataflash_data[] = {
 
 	{ "AT45DB642x",  0x1f2800, 8192, 1056, 11, SUP_POW2PS},
 	{ "at45db642d",  0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS},
+
+	{ "AT45DB641E",  0x1f28000100, 32768, 264, 9, SUP_EXTID | SUP_POW2PS},
+	{ "at45db641e",  0x1f28000100, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS},
 };
 
-static struct flash_info *jedec_probe(struct spi_device *spi)
+static struct flash_info *jedec_lookup(struct spi_device *spi,
+				       u64 jedec, bool use_extid)
 {
-	int			tmp;
-	uint8_t			code = OP_READ_ID;
-	uint8_t			id[3];
-	uint32_t		jedec;
-	struct flash_info	*info;
+	struct flash_info *info;
 	int status;
 
-	/* JEDEC also defines an optional "extended device information"
-	 * string for after vendor-specific data, after the three bytes
-	 * we use here.  Supporting some chips might require using it.
-	 *
-	 * If the vendor ID isn't Atmel's (0x1f), assume this call failed.
-	 * That's not an error; only rev C and newer chips handle it, and
-	 * only Atmel sells these chips.
-	 */
-	tmp = spi_write_then_read(spi, &code, 1, id, 3);
-	if (tmp < 0) {
-		pr_debug("%s: error %d reading JEDEC ID\n",
-			dev_name(&spi->dev), tmp);
-		return ERR_PTR(tmp);
-	}
-	if (id[0] != 0x1f)
-		return NULL;
+	for (info = dataflash_data;
+	     info < dataflash_data + ARRAY_SIZE(dataflash_data);
+	     info++) {
+		if (use_extid && !(info->flags & SUP_EXTID))
+			continue;
 
-	jedec = id[0];
-	jedec = jedec << 8;
-	jedec |= id[1];
-	jedec = jedec << 8;
-	jedec |= id[2];
-
-	for (tmp = 0, info = dataflash_data;
-			tmp < ARRAY_SIZE(dataflash_data);
-			tmp++, info++) {
 		if (info->jedec_id == jedec) {
-			pr_debug("%s: OTP, sector protect%s\n",
-				dev_name(&spi->dev),
-				(info->flags & SUP_POW2PS)
-					? ", binary pagesize" : ""
-				);
+			dev_dbg(&spi->dev, "OTP, sector protect%s\n",
+				(info->flags & SUP_POW2PS) ?
+				", binary pagesize" : "");
 			if (info->flags & SUP_POW2PS) {
 				status = dataflash_status(spi);
 				if (status < 0) {
-					pr_debug("%s: status error %d\n",
-						dev_name(&spi->dev), status);
+					dev_dbg(&spi->dev, "status error %d\n",
+						status);
 					return ERR_PTR(status);
 				}
 				if (status & 0x1) {
@@ -796,12 +776,58 @@ static struct flash_info *jedec_probe(struct spi_device *spi)
 		}
 	}
 
+	return ERR_PTR(-ENODEV);
+}
+
+static struct flash_info *jedec_probe(struct spi_device *spi)
+{
+	int ret;
+	u8 code = OP_READ_ID;
+	u64 jedec;
+	u8 id[sizeof(jedec)] = {0};
+	const unsigned int id_size = 5;
+	struct flash_info *info;
+
+	/*
+	 * JEDEC also defines an optional "extended device information"
+	 * string for after vendor-specific data, after the three bytes
+	 * we use here.  Supporting some chips might require using it.
+	 *
+	 * If the vendor ID isn't Atmel's (0x1f), assume this call failed.
+	 * That's not an error; only rev C and newer chips handle it, and
+	 * only Atmel sells these chips.
+	 */
+	ret = spi_write_then_read(spi, &code, 1, id, id_size);
+	if (ret < 0) {
+		dev_dbg(&spi->dev, "error %d reading JEDEC ID\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	if (id[0] != CFI_MFR_ATMEL)
+		return NULL;
+
+	jedec = be64_to_cpup((__be64 *)id);
+
+	/*
+	 * First, try to match device using extended device
+	 * information
+	 */
+	info = jedec_lookup(spi, jedec >> DATAFLASH_SHIFT_EXTID, true);
+	if (!IS_ERR(info))
+		return info;
+	/*
+	 * If that fails, make another pass using regular ID
+	 * information
+	 */
+	info = jedec_lookup(spi, jedec >> DATAFLASH_SHIFT_ID, false);
+	if (!IS_ERR(info))
+		return info;
 	/*
 	 * Treat other chips as errors ... we won't know the right page
 	 * size (it might be binary) even when we can tell which density
 	 * class is involved (legacy chip id scheme).
 	 */
-	dev_warn(&spi->dev, "JEDEC id %06x not handled\n", jedec);
+	dev_warn(&spi->dev, "JEDEC id %016llx not handled\n", jedec);
 	return ERR_PTR(-ENODEV);
 }
 
@@ -845,8 +871,7 @@ static int dataflash_probe(struct spi_device *spi)
 	 */
 	status = dataflash_status(spi);
 	if (status <= 0 || status == 0xff) {
-		pr_debug("%s: status error %d\n",
-				dev_name(&spi->dev), status);
+		dev_dbg(&spi->dev, "status error %d\n", status);
 		if (status == 0 || status == 0xff)
 			status = -ENODEV;
 		return status;
@@ -887,8 +912,7 @@ static int dataflash_probe(struct spi_device *spi)
 	}
 
 	if (status < 0)
-		pr_debug("%s: add_dataflash --> %d\n", dev_name(&spi->dev),
-				status);
+		dev_dbg(&spi->dev, "add_dataflash --> %d\n", status);
 
 	return status;
 }
@@ -898,7 +922,7 @@ static int dataflash_remove(struct spi_device *spi)
 	struct dataflash	*flash = spi_get_drvdata(spi);
 	int			status;
 
-	pr_debug("%s: remove\n", dev_name(&spi->dev));
+	dev_dbg(&spi->dev, "remove\n");
 
 	status = mtd_device_unregister(&flash->mtd);
 	if (status == 0)

diff --git a/drivers/mtd/devices/serial_flash_cmds.h b/drivers/mtd/devices/serial_flash_cmds.h
index 8b81e15..eba125c 100644
--- a/drivers/mtd/devices/serial_flash_cmds.h
+++ b/drivers/mtd/devices/serial_flash_cmds.h

@@ -13,7 +13,6 @@
 #define _MTD_SERIAL_FLASH_CMDS_H
 
 /* Generic Flash Commands/OPCODEs */
-#define SPINOR_OP_RDSR2		0x35
 #define SPINOR_OP_WRVCR		0x81
 #define SPINOR_OP_RDVCR		0x85
 

diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c
index 804313a..21afd94 100644
--- a/drivers/mtd/devices/st_spi_fsm.c
+++ b/drivers/mtd/devices/st_spi_fsm.c

@@ -1445,7 +1445,7 @@ static int stfsm_s25fl_config(struct stfsm *fsm)
 	}
 
 	/* Check status of 'QE' bit, update if required. */
-	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &cr1, 1);
+	stfsm_read_status(fsm, SPINOR_OP_RDCR, &cr1, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
 		if (!(cr1 & STFSM_S25FL_CONFIG_QE)) {
@@ -1490,7 +1490,7 @@ static int stfsm_w25q_config(struct stfsm *fsm)
 		return ret;
 
 	/* Check status of 'QE' bit, update if required. */
-	stfsm_read_status(fsm, SPINOR_OP_RDSR2, &sr2, 1);
+	stfsm_read_status(fsm, SPINOR_OP_RDCR, &sr2, 1);
 	data_pads = ((fsm->stfsm_seq_read.seq_cfg >> 16) & 0x3) + 1;
 	if (data_pads == 4) {
 		if (!(sr2 & W25Q_STATUS_QE)) {

diff --git a/drivers/mtd/maps/physmap_of_gemini.c b/drivers/mtd/maps/physmap_of_gemini.c
index 9d371cd..05b286b 100644
--- a/drivers/mtd/maps/physmap_of_gemini.c
+++ b/drivers/mtd/maps/physmap_of_gemini.c

@@ -59,7 +59,7 @@ int of_flash_probe_gemini(struct platform_device *pdev,
 			  struct device_node *np,
 			  struct map_info *map)
 {
-	static struct regmap *rmap;
+	struct regmap *rmap;
 	struct device *dev = &pdev->dev;
 	u32 val;
 	int ret;

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 1517da3..956382c 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c

@@ -991,7 +991,7 @@ EXPORT_SYMBOL_GPL(mtd_point);
 /* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
-	if (!mtd->_point)
+	if (!mtd->_unpoint)
 		return -EOPNOTSUPP;
 	if (from < 0 || from >= mtd->size || len > mtd->size - from)
 		return -EINVAL;

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index ea5e530..5736b0c 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c

@@ -37,10 +37,16 @@
 static LIST_HEAD(mtd_partitions);
 static DEFINE_MUTEX(mtd_partitions_mutex);
 
-/* Our partition node structure */
+/**
+ * struct mtd_part - our partition node structure
+ *
+ * @mtd: struct holding partition details
+ * @parent: parent mtd - flash device or another partition
+ * @offset: partition offset relative to the *flash device*
+ */
 struct mtd_part {
 	struct mtd_info mtd;
-	struct mtd_info *master;
+	struct mtd_info *parent;
 	uint64_t offset;
 	struct list_head list;
 };
@@ -67,15 +73,15 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct mtd_ecc_stats stats;
 	int res;
 
-	stats = part->master->ecc_stats;
-	res = part->master->_read(part->master, from + part->offset, len,
+	stats = part->parent->ecc_stats;
+	res = part->parent->_read(part->parent, from + part->offset, len,
 				  retlen, buf);
 	if (unlikely(mtd_is_eccerr(res)))
 		mtd->ecc_stats.failed +=
-			part->master->ecc_stats.failed - stats.failed;
+			part->parent->ecc_stats.failed - stats.failed;
 	else
 		mtd->ecc_stats.corrected +=
-			part->master->ecc_stats.corrected - stats.corrected;
+			part->parent->ecc_stats.corrected - stats.corrected;
 	return res;
 }
 
@@ -84,7 +90,7 @@ static int part_point(struct mtd_info *mtd, loff_t from, size_t len,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_point(part->master, from + part->offset, len,
+	return part->parent->_point(part->parent, from + part->offset, len,
 				    retlen, virt, phys);
 }
 
@@ -92,7 +98,7 @@ static int part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_unpoint(part->master, from + part->offset, len);
+	return part->parent->_unpoint(part->parent, from + part->offset, len);
 }
 
 static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
@@ -103,7 +109,7 @@ static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
 	struct mtd_part *part = mtd_to_part(mtd);
 
 	offset += part->offset;
-	return part->master->_get_unmapped_area(part->master, len, offset,
+	return part->parent->_get_unmapped_area(part->parent, len, offset,
 						flags);
 }
 
@@ -132,7 +138,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 			return -EINVAL;
 	}
 
-	res = part->master->_read_oob(part->master, from + part->offset, ops);
+	res = part->parent->_read_oob(part->parent, from + part->offset, ops);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected++;
@@ -146,7 +152,7 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_read_user_prot_reg(part->master, from, len,
+	return part->parent->_read_user_prot_reg(part->parent, from, len,
 						 retlen, buf);
 }
 
@@ -154,7 +160,7 @@ static int part_get_user_prot_info(struct mtd_info *mtd, size_t len,
 				   size_t *retlen, struct otp_info *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_user_prot_info(part->master, len, retlen,
+	return part->parent->_get_user_prot_info(part->parent, len, retlen,
 						 buf);
 }
 
@@ -162,7 +168,7 @@ static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_read_fact_prot_reg(part->master, from, len,
+	return part->parent->_read_fact_prot_reg(part->parent, from, len,
 						 retlen, buf);
 }
 
@@ -170,7 +176,7 @@ static int part_get_fact_prot_info(struct mtd_info *mtd, size_t len,
 				   size_t *retlen, struct otp_info *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_fact_prot_info(part->master, len, retlen,
+	return part->parent->_get_fact_prot_info(part->parent, len, retlen,
 						 buf);
 }
 
@@ -178,7 +184,7 @@ static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_write(part->master, to + part->offset, len,
+	return part->parent->_write(part->parent, to + part->offset, len,
 				    retlen, buf);
 }
 
@@ -186,7 +192,7 @@ static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_panic_write(part->master, to + part->offset, len,
+	return part->parent->_panic_write(part->parent, to + part->offset, len,
 					  retlen, buf);
 }
 
@@ -199,14 +205,14 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to,
 		return -EINVAL;
 	if (ops->datbuf && to + ops->len > mtd->size)
 		return -EINVAL;
-	return part->master->_write_oob(part->master, to + part->offset, ops);
+	return part->parent->_write_oob(part->parent, to + part->offset, ops);
 }
 
 static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_write_user_prot_reg(part->master, from, len,
+	return part->parent->_write_user_prot_reg(part->parent, from, len,
 						  retlen, buf);
 }
 
@@ -214,14 +220,14 @@ static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_lock_user_prot_reg(part->master, from, len);
+	return part->parent->_lock_user_prot_reg(part->parent, from, len);
 }
 
 static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		unsigned long count, loff_t to, size_t *retlen)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_writev(part->master, vecs, count,
+	return part->parent->_writev(part->parent, vecs, count,
 				     to + part->offset, retlen);
 }
 
@@ -231,7 +237,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	int ret;
 
 	instr->addr += part->offset;
-	ret = part->master->_erase(part->master, instr);
+	ret = part->parent->_erase(part->parent, instr);
 	if (ret) {
 		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
@@ -257,51 +263,51 @@ EXPORT_SYMBOL_GPL(mtd_erase_callback);
 static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_lock(part->master, ofs + part->offset, len);
+	return part->parent->_lock(part->parent, ofs + part->offset, len);
 }
 
 static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_unlock(part->master, ofs + part->offset, len);
+	return part->parent->_unlock(part->parent, ofs + part->offset, len);
 }
 
 static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_is_locked(part->master, ofs + part->offset, len);
+	return part->parent->_is_locked(part->parent, ofs + part->offset, len);
 }
 
 static void part_sync(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_sync(part->master);
+	part->parent->_sync(part->parent);
 }
 
 static int part_suspend(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_suspend(part->master);
+	return part->parent->_suspend(part->parent);
 }
 
 static void part_resume(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_resume(part->master);
+	part->parent->_resume(part->parent);
 }
 
 static int part_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 	ofs += part->offset;
-	return part->master->_block_isreserved(part->master, ofs);
+	return part->parent->_block_isreserved(part->parent, ofs);
 }
 
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 	ofs += part->offset;
-	return part->master->_block_isbad(part->master, ofs);
+	return part->parent->_block_isbad(part->parent, ofs);
 }
 
 static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -310,7 +316,7 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	int res;
 
 	ofs += part->offset;
-	res = part->master->_block_markbad(part->master, ofs);
+	res = part->parent->_block_markbad(part->parent, ofs);
 	if (!res)
 		mtd->ecc_stats.badblocks++;
 	return res;
@@ -319,13 +325,13 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 static int part_get_device(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	return part->master->_get_device(part->master);
+	return part->parent->_get_device(part->parent);
 }
 
 static void part_put_device(struct mtd_info *mtd)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
-	part->master->_put_device(part->master);
+	part->parent->_put_device(part->parent);
 }
 
 static int part_ooblayout_ecc(struct mtd_info *mtd, int section,
@@ -333,7 +339,7 @@ static int part_ooblayout_ecc(struct mtd_info *mtd, int section,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return mtd_ooblayout_ecc(part->master, section, oobregion);
+	return mtd_ooblayout_ecc(part->parent, section, oobregion);
 }
 
 static int part_ooblayout_free(struct mtd_info *mtd, int section,
@@ -341,7 +347,7 @@ static int part_ooblayout_free(struct mtd_info *mtd, int section,
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return mtd_ooblayout_free(part->master, section, oobregion);
+	return mtd_ooblayout_free(part->parent, section, oobregion);
 }
 
 static const struct mtd_ooblayout_ops part_ooblayout_ops = {
@@ -353,7 +359,7 @@ static int part_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len)
 {
 	struct mtd_part *part = mtd_to_part(mtd);
 
-	return part->master->_max_bad_blocks(part->master,
+	return part->parent->_max_bad_blocks(part->parent,
 					     ofs + part->offset, len);
 }
 
@@ -363,63 +369,70 @@ static inline void free_partition(struct mtd_part *p)
 	kfree(p);
 }
 
-/*
- * This function unregisters and destroy all slave MTD objects which are
- * attached to the given master MTD object.
+/**
+ * mtd_parse_part - parse MTD partition looking for subpartitions
+ *
+ * @slave: part that is supposed to be a container and should be parsed
+ * @types: NULL-terminated array with names of partition parsers to try
+ *
+ * Some partitions are kind of containers with extra subpartitions (volumes).
+ * There can be various formats of such containers. This function tries to use
+ * specified parsers to analyze given partition and registers found
+ * subpartitions on success.
  */
-
-int del_mtd_partitions(struct mtd_info *master)
+static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
 {
-	struct mtd_part *slave, *next;
-	int ret, err = 0;
+	struct mtd_partitions parsed;
+	int err;
 
-	mutex_lock(&mtd_partitions_mutex);
-	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if (slave->master == master) {
-			ret = del_mtd_device(&slave->mtd);
-			if (ret < 0) {
-				err = ret;
-				continue;
-			}
-			list_del(&slave->list);
-			free_partition(slave);
-		}
-	mutex_unlock(&mtd_partitions_mutex);
+	err = parse_mtd_partitions(&slave->mtd, types, &parsed, NULL);
+	if (err)
+		return err;
+	else if (!parsed.nr_parts)
+		return -ENOENT;
+
+	err = add_mtd_partitions(&slave->mtd, parsed.parts, parsed.nr_parts);
+
+	mtd_part_parser_cleanup(&parsed);
 
 	return err;
 }
 
-static struct mtd_part *allocate_partition(struct mtd_info *master,
+static struct mtd_part *allocate_partition(struct mtd_info *parent,
 			const struct mtd_partition *part, int partno,
 			uint64_t cur_offset)
 {
+	int wr_alignment = (parent->flags & MTD_NO_ERASE) ? parent->writesize :
+							    parent->erasesize;
 	struct mtd_part *slave;
+	u32 remainder;
 	char *name;
+	u64 tmp;
 
 	/* allocate the partition structure */
 	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
 	name = kstrdup(part->name, GFP_KERNEL);
 	if (!name || !slave) {
 		printk(KERN_ERR"memory allocation error while creating partitions for \"%s\"\n",
-		       master->name);
+		       parent->name);
 		kfree(name);
 		kfree(slave);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	/* set up the MTD object for this partition */
-	slave->mtd.type = master->type;
-	slave->mtd.flags = master->flags & ~part->mask_flags;
+	slave->mtd.type = parent->type;
+	slave->mtd.flags = parent->flags & ~part->mask_flags;
 	slave->mtd.size = part->size;
-	slave->mtd.writesize = master->writesize;
-	slave->mtd.writebufsize = master->writebufsize;
-	slave->mtd.oobsize = master->oobsize;
-	slave->mtd.oobavail = master->oobavail;
-	slave->mtd.subpage_sft = master->subpage_sft;
-	slave->mtd.pairing = master->pairing;
+	slave->mtd.writesize = parent->writesize;
+	slave->mtd.writebufsize = parent->writebufsize;
+	slave->mtd.oobsize = parent->oobsize;
+	slave->mtd.oobavail = parent->oobavail;
+	slave->mtd.subpage_sft = parent->subpage_sft;
+	slave->mtd.pairing = parent->pairing;
 
 	slave->mtd.name = name;
-	slave->mtd.owner = master->owner;
+	slave->mtd.owner = parent->owner;
 
 	/* NOTE: Historically, we didn't arrange MTDs as a tree out of
 	 * concern for showing the same data in multiple partitions.
@@ -429,80 +442,81 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	 * parent conditional on that option. Note, this is a way to
 	 * distinguish between the master and the partition in sysfs.
 	 */
-	slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ?
-				&master->dev :
-				master->dev.parent;
+	slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ?
+				&parent->dev :
+				parent->dev.parent;
 	slave->mtd.dev.of_node = part->of_node;
 
 	slave->mtd._read = part_read;
 	slave->mtd._write = part_write;
 
-	if (master->_panic_write)
+	if (parent->_panic_write)
 		slave->mtd._panic_write = part_panic_write;
 
-	if (master->_point && master->_unpoint) {
+	if (parent->_point && parent->_unpoint) {
 		slave->mtd._point = part_point;
 		slave->mtd._unpoint = part_unpoint;
 	}
 
-	if (master->_get_unmapped_area)
+	if (parent->_get_unmapped_area)
 		slave->mtd._get_unmapped_area = part_get_unmapped_area;
-	if (master->_read_oob)
+	if (parent->_read_oob)
 		slave->mtd._read_oob = part_read_oob;
-	if (master->_write_oob)
+	if (parent->_write_oob)
 		slave->mtd._write_oob = part_write_oob;
-	if (master->_read_user_prot_reg)
+	if (parent->_read_user_prot_reg)
 		slave->mtd._read_user_prot_reg = part_read_user_prot_reg;
-	if (master->_read_fact_prot_reg)
+	if (parent->_read_fact_prot_reg)
 		slave->mtd._read_fact_prot_reg = part_read_fact_prot_reg;
-	if (master->_write_user_prot_reg)
+	if (parent->_write_user_prot_reg)
 		slave->mtd._write_user_prot_reg = part_write_user_prot_reg;
-	if (master->_lock_user_prot_reg)
+	if (parent->_lock_user_prot_reg)
 		slave->mtd._lock_user_prot_reg = part_lock_user_prot_reg;
-	if (master->_get_user_prot_info)
+	if (parent->_get_user_prot_info)
 		slave->mtd._get_user_prot_info = part_get_user_prot_info;
-	if (master->_get_fact_prot_info)
+	if (parent->_get_fact_prot_info)
 		slave->mtd._get_fact_prot_info = part_get_fact_prot_info;
-	if (master->_sync)
+	if (parent->_sync)
 		slave->mtd._sync = part_sync;
-	if (!partno && !master->dev.class && master->_suspend &&
-	    master->_resume) {
-			slave->mtd._suspend = part_suspend;
-			slave->mtd._resume = part_resume;
+	if (!partno && !parent->dev.class && parent->_suspend &&
+	    parent->_resume) {
+		slave->mtd._suspend = part_suspend;
+		slave->mtd._resume = part_resume;
 	}
-	if (master->_writev)
+	if (parent->_writev)
 		slave->mtd._writev = part_writev;
-	if (master->_lock)
+	if (parent->_lock)
 		slave->mtd._lock = part_lock;
-	if (master->_unlock)
+	if (parent->_unlock)
 		slave->mtd._unlock = part_unlock;
-	if (master->_is_locked)
+	if (parent->_is_locked)
 		slave->mtd._is_locked = part_is_locked;
-	if (master->_block_isreserved)
+	if (parent->_block_isreserved)
 		slave->mtd._block_isreserved = part_block_isreserved;
-	if (master->_block_isbad)
+	if (parent->_block_isbad)
 		slave->mtd._block_isbad = part_block_isbad;
-	if (master->_block_markbad)
+	if (parent->_block_markbad)
 		slave->mtd._block_markbad = part_block_markbad;
-	if (master->_max_bad_blocks)
+	if (parent->_max_bad_blocks)
 		slave->mtd._max_bad_blocks = part_max_bad_blocks;
 
-	if (master->_get_device)
+	if (parent->_get_device)
 		slave->mtd._get_device = part_get_device;
-	if (master->_put_device)
+	if (parent->_put_device)
 		slave->mtd._put_device = part_put_device;
 
 	slave->mtd._erase = part_erase;
-	slave->master = master;
+	slave->parent = parent;
 	slave->offset = part->offset;
 
 	if (slave->offset == MTDPART_OFS_APPEND)
 		slave->offset = cur_offset;
 	if (slave->offset == MTDPART_OFS_NXTBLK) {
+		tmp = cur_offset;
 		slave->offset = cur_offset;
-		if (mtd_mod_by_eb(cur_offset, master) != 0) {
-			/* Round up to next erasesize */
-			slave->offset = (mtd_div_by_eb(cur_offset, master) + 1) * master->erasesize;
+		remainder = do_div(tmp, wr_alignment);
+		if (remainder) {
+			slave->offset += wr_alignment - remainder;
 			printk(KERN_NOTICE "Moving partition %d: "
 			       "0x%012llx -> 0x%012llx\n", partno,
 			       (unsigned long long)cur_offset, (unsigned long long)slave->offset);
@@ -510,25 +524,25 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	}
 	if (slave->offset == MTDPART_OFS_RETAIN) {
 		slave->offset = cur_offset;
-		if (master->size - slave->offset >= slave->mtd.size) {
-			slave->mtd.size = master->size - slave->offset
+		if (parent->size - slave->offset >= slave->mtd.size) {
+			slave->mtd.size = parent->size - slave->offset
 							- slave->mtd.size;
 		} else {
 			printk(KERN_ERR "mtd partition \"%s\" doesn't have enough space: %#llx < %#llx, disabled\n",
-				part->name, master->size - slave->offset,
+				part->name, parent->size - slave->offset,
 				slave->mtd.size);
 			/* register to preserve ordering */
 			goto out_register;
 		}
 	}
 	if (slave->mtd.size == MTDPART_SIZ_FULL)
-		slave->mtd.size = master->size - slave->offset;
+		slave->mtd.size = parent->size - slave->offset;
 
 	printk(KERN_NOTICE "0x%012llx-0x%012llx : \"%s\"\n", (unsigned long long)slave->offset,
 		(unsigned long long)(slave->offset + slave->mtd.size), slave->mtd.name);
 
 	/* let's do some sanity checks */
-	if (slave->offset >= master->size) {
+	if (slave->offset >= parent->size) {
 		/* let's register it anyway to preserve ordering */
 		slave->offset = 0;
 		slave->mtd.size = 0;
@@ -536,16 +550,16 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 			part->name);
 		goto out_register;
 	}
-	if (slave->offset + slave->mtd.size > master->size) {
-		slave->mtd.size = master->size - slave->offset;
+	if (slave->offset + slave->mtd.size > parent->size) {
+		slave->mtd.size = parent->size - slave->offset;
 		printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#llx\n",
-			part->name, master->name, (unsigned long long)slave->mtd.size);
+			part->name, parent->name, (unsigned long long)slave->mtd.size);
 	}
-	if (master->numeraseregions > 1) {
+	if (parent->numeraseregions > 1) {
 		/* Deal with variable erase size stuff */
-		int i, max = master->numeraseregions;
+		int i, max = parent->numeraseregions;
 		u64 end = slave->offset + slave->mtd.size;
-		struct mtd_erase_region_info *regions = master->eraseregions;
+		struct mtd_erase_region_info *regions = parent->eraseregions;
 
 		/* Find the first erase regions which is part of this
 		 * partition. */
@@ -564,37 +578,40 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		BUG_ON(slave->mtd.erasesize == 0);
 	} else {
 		/* Single erase size */
-		slave->mtd.erasesize = master->erasesize;
+		slave->mtd.erasesize = parent->erasesize;
 	}
 
-	if ((slave->mtd.flags & MTD_WRITEABLE) &&
-	    mtd_mod_by_eb(slave->offset, &slave->mtd)) {
+	tmp = slave->offset;
+	remainder = do_div(tmp, wr_alignment);
+	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
 		/* Doesn't start on a boundary of major erase size */
 		/* FIXME: Let it be writable if it is on a boundary of
 		 * _minor_ erase size though */
 		slave->mtd.flags &= ~MTD_WRITEABLE;
-		printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
+		printk(KERN_WARNING"mtd: partition \"%s\" doesn't start on an erase/write block boundary -- force read-only\n",
 			part->name);
 	}
-	if ((slave->mtd.flags & MTD_WRITEABLE) &&
-	    mtd_mod_by_eb(slave->mtd.size, &slave->mtd)) {
+
+	tmp = slave->mtd.size;
+	remainder = do_div(tmp, wr_alignment);
+	if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
 		slave->mtd.flags &= ~MTD_WRITEABLE;
-		printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
+		printk(KERN_WARNING"mtd: partition \"%s\" doesn't end on an erase/write block -- force read-only\n",
 			part->name);
 	}
 
 	mtd_set_ooblayout(&slave->mtd, &part_ooblayout_ops);
-	slave->mtd.ecc_step_size = master->ecc_step_size;
-	slave->mtd.ecc_strength = master->ecc_strength;
-	slave->mtd.bitflip_threshold = master->bitflip_threshold;
+	slave->mtd.ecc_step_size = parent->ecc_step_size;
+	slave->mtd.ecc_strength = parent->ecc_strength;
+	slave->mtd.bitflip_threshold = parent->bitflip_threshold;
 
-	if (master->_block_isbad) {
+	if (parent->_block_isbad) {
 		uint64_t offs = 0;
 
 		while (offs < slave->mtd.size) {
-			if (mtd_block_isreserved(master, offs + slave->offset))
+			if (mtd_block_isreserved(parent, offs + slave->offset))
 				slave->mtd.ecc_stats.bbtblocks++;
-			else if (mtd_block_isbad(master, offs + slave->offset))
+			else if (mtd_block_isbad(parent, offs + slave->offset))
 				slave->mtd.ecc_stats.badblocks++;
 			offs += slave->mtd.erasesize;
 		}
@@ -628,7 +645,7 @@ static int mtd_add_partition_attrs(struct mtd_part *new)
 	return ret;
 }
 
-int mtd_add_partition(struct mtd_info *master, const char *name,
+int mtd_add_partition(struct mtd_info *parent, const char *name,
 		      long long offset, long long length)
 {
 	struct mtd_partition part;
@@ -641,7 +658,7 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 		return -EINVAL;
 
 	if (length == MTDPART_SIZ_FULL)
-		length = master->size - offset;
+		length = parent->size - offset;
 
 	if (length <= 0)
 		return -EINVAL;
@@ -651,7 +668,7 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 	part.size = length;
 	part.offset = offset;
 
-	new = allocate_partition(master, &part, -1, offset);
+	new = allocate_partition(parent, &part, -1, offset);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
@@ -667,23 +684,69 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
 }
 EXPORT_SYMBOL_GPL(mtd_add_partition);
 
-int mtd_del_partition(struct mtd_info *master, int partno)
+/**
+ * __mtd_del_partition - delete MTD partition
+ *
+ * @priv: internal MTD struct for partition to be deleted
+ *
+ * This function must be called with the partitions mutex locked.
+ */
+static int __mtd_del_partition(struct mtd_part *priv)
+{
+	struct mtd_part *child, *next;
+	int err;
+
+	list_for_each_entry_safe(child, next, &mtd_partitions, list) {
+		if (child->parent == &priv->mtd) {
+			err = __mtd_del_partition(child);
+			if (err)
+				return err;
+		}
+	}
+
+	sysfs_remove_files(&priv->mtd.dev.kobj, mtd_partition_attrs);
+
+	err = del_mtd_device(&priv->mtd);
+	if (err)
+		return err;
+
+	list_del(&priv->list);
+	free_partition(priv);
+
+	return 0;
+}
+
+/*
+ * This function unregisters and destroy all slave MTD objects which are
+ * attached to the given MTD object.
+ */
+int del_mtd_partitions(struct mtd_info *mtd)
+{
+	struct mtd_part *slave, *next;
+	int ret, err = 0;
+
+	mutex_lock(&mtd_partitions_mutex);
+	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
+		if (slave->parent == mtd) {
+			ret = __mtd_del_partition(slave);
+			if (ret < 0)
+				err = ret;
+		}
+	mutex_unlock(&mtd_partitions_mutex);
+
+	return err;
+}
+
+int mtd_del_partition(struct mtd_info *mtd, int partno)
 {
 	struct mtd_part *slave, *next;
 	int ret = -EINVAL;
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry_safe(slave, next, &mtd_partitions, list)
-		if ((slave->master == master) &&
+		if ((slave->parent == mtd) &&
 		    (slave->mtd.index == partno)) {
-			sysfs_remove_files(&slave->mtd.dev.kobj,
-					   mtd_partition_attrs);
-			ret = del_mtd_device(&slave->mtd);
-			if (ret < 0)
-				break;
-
-			list_del(&slave->list);
-			free_partition(slave);
+			ret = __mtd_del_partition(slave);
 			break;
 		}
 	mutex_unlock(&mtd_partitions_mutex);
@@ -724,6 +787,8 @@ int add_mtd_partitions(struct mtd_info *master,
 
 		add_mtd_device(&slave->mtd);
 		mtd_add_partition_attrs(slave);
+		if (parts[i].types)
+			mtd_parse_part(slave, parts[i].types);
 
 		cur_offset = slave->offset + slave->mtd.size;
 	}
@@ -799,6 +864,27 @@ static const char * const default_mtd_part_types[] = {
 	NULL
 };
 
+static int mtd_part_do_parse(struct mtd_part_parser *parser,
+			     struct mtd_info *master,
+			     struct mtd_partitions *pparts,
+			     struct mtd_part_parser_data *data)
+{
+	int ret;
+
+	ret = (*parser->parse_fn)(master, &pparts->parts, data);
+	pr_debug("%s: parser %s: %i\n", master->name, parser->name, ret);
+	if (ret <= 0)
+		return ret;
+
+	pr_notice("%d %s partitions found on MTD device %s\n", ret,
+		  parser->name, master->name);
+
+	pparts->nr_parts = ret;
+	pparts->parser = parser;
+
+	return ret;
+}
+
 /**
  * parse_mtd_partitions - parse MTD partitions
  * @master: the master partition (describes whole MTD device)
@@ -839,16 +925,10 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types,
 			 parser ? parser->name : NULL);
 		if (!parser)
 			continue;
-		ret = (*parser->parse_fn)(master, &pparts->parts, data);
-		pr_debug("%s: parser %s: %i\n",
-			 master->name, parser->name, ret);
-		if (ret > 0) {
-			printk(KERN_NOTICE "%d %s partitions found on MTD device %s\n",
-			       ret, parser->name, master->name);
-			pparts->nr_parts = ret;
-			pparts->parser = parser;
+		ret = mtd_part_do_parse(parser, master, pparts, data);
+		/* Found partitions! */
+		if (ret > 0)
 			return 0;
-		}
 		mtd_part_parser_put(parser);
 		/*
 		 * Stash the first error we see; only report it if no parser
@@ -899,6 +979,6 @@ uint64_t mtd_get_device_size(const struct mtd_info *mtd)
 	if (!mtd_is_partition(mtd))
 		return mtd->size;
 
-	return mtd_to_part(mtd)->master->size;
+	return mtd_get_device_size(mtd_to_part(mtd)->parent);
 }
 EXPORT_SYMBOL_GPL(mtd_get_device_size);

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index c302952..dbfa72d 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig

@@ -308,6 +308,7 @@
 config MTD_NAND_ATMEL
 	tristate "Support for NAND Flash / SmartMedia on AT91"
 	depends on ARCH_AT91
+	select MFD_ATMEL_SMC
 	help
 	  Enables support for NAND Flash / Smart Media Card interface
 	  on Atmel AT91 processors.
@@ -542,6 +543,7 @@
 
 config MTD_NAND_HISI504
 	tristate "Support for NAND controller on Hisilicon SoC Hip04"
+	depends on ARCH_HISI || COMPILE_TEST
 	depends on HAS_DMA
 	help
 	  Enables support for NAND controller on Hisilicon SoC Hip04.
@@ -555,6 +557,7 @@
 
 config MTD_NAND_MTK
 	tristate "Support for NAND controller on MTK SoCs"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
 	depends on HAS_DMA
 	help
 	  Enables support for NAND controller on MTK SoCs.

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 3b24468..d922a88 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c

@@ -57,6 +57,7 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/atmel-matrix.h>
+#include <linux/mfd/syscon/atmel-smc.h>
 #include <linux/module.h>
 #include <linux/mtd/nand.h>
 #include <linux/of_address.h>
@@ -64,7 +65,6 @@
 #include <linux/of_platform.h>
 #include <linux/iopoll.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/atmel.h>
 #include <linux/regmap.h>
 
 #include "pmecc.h"
@@ -151,6 +151,8 @@ struct atmel_nand_cs {
 		void __iomem *virt;
 		dma_addr_t dma;
 	} io;
+
+	struct atmel_smc_cs_conf smcconf;
 };
 
 struct atmel_nand {
@@ -196,6 +198,8 @@ struct atmel_nand_controller_ops {
 	void (*nand_init)(struct atmel_nand_controller *nc,
 			  struct atmel_nand *nand);
 	int (*ecc_init)(struct atmel_nand *nand);
+	int (*setup_data_interface)(struct atmel_nand *nand, int csline,
+				    const struct nand_data_interface *conf);
 };
 
 struct atmel_nand_controller_caps {
@@ -912,7 +916,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_hsmc_nand_controller *nc;
-	int ret;
+	int ret, status;
 
 	nc = to_hsmc_nand_controller(chip->controller);
 
@@ -954,6 +958,10 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
 			ret);
 
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return ret;
 }
 
@@ -1175,6 +1183,295 @@ static int atmel_hsmc_nand_ecc_init(struct atmel_nand *nand)
 	return 0;
 }
 
+static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
+					const struct nand_data_interface *conf,
+					struct atmel_smc_cs_conf *smcconf)
+{
+	u32 ncycles, totalcycles, timeps, mckperiodps;
+	struct atmel_nand_controller *nc;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	/* DDR interface not supported. */
+	if (conf->type != NAND_SDR_IFACE)
+		return -ENOTSUPP;
+
+	/*
+	 * tRC < 30ns implies EDO mode. This controller does not support this
+	 * mode.
+	 */
+	if (conf->timings.sdr.tRC_min < 30)
+		return -ENOTSUPP;
+
+	atmel_smc_cs_conf_init(smcconf);
+
+	mckperiodps = NSEC_PER_SEC / clk_get_rate(nc->mck);
+	mckperiodps *= 1000;
+
+	/*
+	 * Set write pulse timing. This one is easy to extract:
+	 *
+	 * NWE_PULSE = tWP
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWP_min, mckperiodps);
+	totalcycles = ncycles;
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * The write setup timing depends on the operation done on the NAND.
+	 * All operations goes through the same data bus, but the operation
+	 * type depends on the address we are writing to (ALE/CLE address
+	 * lines).
+	 * Since we have no way to differentiate the different operations at
+	 * the SMC level, we must consider the worst case (the biggest setup
+	 * time among all operation types):
+	 *
+	 * NWE_SETUP = max(tCLS, tCS, tALS, tDS) - NWE_PULSE
+	 */
+	timeps = max3(conf->timings.sdr.tCLS_min, conf->timings.sdr.tCS_min,
+		      conf->timings.sdr.tALS_min);
+	timeps = max(timeps, conf->timings.sdr.tDS_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	ncycles = ncycles > totalcycles ? ncycles - totalcycles : 0;
+	totalcycles += ncycles;
+	ret = atmel_smc_cs_conf_set_setup(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * As for the write setup timing, the write hold timing depends on the
+	 * operation done on the NAND:
+	 *
+	 * NWE_HOLD = max(tCLH, tCH, tALH, tDH, tWH)
+	 */
+	timeps = max3(conf->timings.sdr.tCLH_min, conf->timings.sdr.tCH_min,
+		      conf->timings.sdr.tALH_min);
+	timeps = max3(timeps, conf->timings.sdr.tDH_min,
+		      conf->timings.sdr.tWH_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	totalcycles += ncycles;
+
+	/*
+	 * The write cycle timing is directly matching tWC, but is also
+	 * dependent on the other timings on the setup and hold timings we
+	 * calculated earlier, which gives:
+	 *
+	 * NWE_CYCLE = max(tWC, NWE_SETUP + NWE_PULSE + NWE_HOLD)
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWC_min, mckperiodps);
+	ncycles = max(totalcycles, ncycles);
+	ret = atmel_smc_cs_conf_set_cycle(smcconf, ATMEL_SMC_NWE_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't want the CS line to be toggled between each byte/word
+	 * transfer to the NAND. The only way to guarantee that is to have the
+	 * NCS_{WR,RD}_{SETUP,HOLD} timings set to 0, which in turn means:
+	 *
+	 * NCS_WR_PULSE = NWE_CYCLE
+	 */
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NCS_WR_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * As for the write setup timing, the read hold timing depends on the
+	 * operation done on the NAND:
+	 *
+	 * NRD_HOLD = max(tREH, tRHOH)
+	 */
+	timeps = max(conf->timings.sdr.tREH_min, conf->timings.sdr.tRHOH_min);
+	ncycles = DIV_ROUND_UP(timeps, mckperiodps);
+	totalcycles = ncycles;
+
+	/*
+	 * TDF = tRHZ - NRD_HOLD
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRHZ_max, mckperiodps);
+	ncycles -= totalcycles;
+
+	/*
+	 * In ONFI 4.0 specs, tRHZ has been increased to support EDO NANDs and
+	 * we might end up with a config that does not fit in the TDF field.
+	 * Just take the max value in this case and hope that the NAND is more
+	 * tolerant than advertised.
+	 */
+	if (ncycles > ATMEL_SMC_MODE_TDF_MAX)
+		ncycles = ATMEL_SMC_MODE_TDF_MAX;
+	else if (ncycles < ATMEL_SMC_MODE_TDF_MIN)
+		ncycles = ATMEL_SMC_MODE_TDF_MIN;
+
+	smcconf->mode |= ATMEL_SMC_MODE_TDF(ncycles) |
+			 ATMEL_SMC_MODE_TDFMODE_OPTIMIZED;
+
+	/*
+	 * Read pulse timing directly matches tRP:
+	 *
+	 * NRD_PULSE = tRP
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRP_min, mckperiodps);
+	totalcycles += ncycles;
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NRD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * The write cycle timing is directly matching tWC, but is also
+	 * dependent on the setup and hold timings we calculated earlier,
+	 * which gives:
+	 *
+	 * NRD_CYCLE = max(tRC, NRD_PULSE + NRD_HOLD)
+	 *
+	 * NRD_SETUP is always 0.
+	 */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRC_min, mckperiodps);
+	ncycles = max(totalcycles, ncycles);
+	ret = atmel_smc_cs_conf_set_cycle(smcconf, ATMEL_SMC_NRD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't want the CS line to be toggled between each byte/word
+	 * transfer from the NAND. The only way to guarantee that is to have
+	 * the NCS_{WR,RD}_{SETUP,HOLD} timings set to 0, which in turn means:
+	 *
+	 * NCS_RD_PULSE = NRD_CYCLE
+	 */
+	ret = atmel_smc_cs_conf_set_pulse(smcconf, ATMEL_SMC_NCS_RD_SHIFT,
+					  ncycles);
+	if (ret)
+		return ret;
+
+	/* Txxx timings are directly matching tXXX ones. */
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tCLR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TCLR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tADL_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TADL_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tAR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TAR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tRR_min, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TRR_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	ncycles = DIV_ROUND_UP(conf->timings.sdr.tWB_max, mckperiodps);
+	ret = atmel_smc_cs_conf_set_timing(smcconf,
+					   ATMEL_HSMC_TIMINGS_TWB_SHIFT,
+					   ncycles);
+	if (ret)
+		return ret;
+
+	/* Attach the CS line to the NFC logic. */
+	smcconf->timings |= ATMEL_HSMC_TIMINGS_NFSEL;
+
+	/* Set the appropriate data bus width. */
+	if (nand->base.options & NAND_BUSWIDTH_16)
+		smcconf->mode |= ATMEL_SMC_MODE_DBW_16;
+
+	/* Operate in NRD/NWE READ/WRITEMODE. */
+	smcconf->mode |= ATMEL_SMC_MODE_READMODE_NRD |
+			 ATMEL_SMC_MODE_WRITEMODE_NWE;
+
+	return 0;
+}
+
+static int atmel_smc_nand_setup_data_interface(struct atmel_nand *nand,
+					int csline,
+					const struct nand_data_interface *conf)
+{
+	struct atmel_nand_controller *nc;
+	struct atmel_smc_cs_conf smcconf;
+	struct atmel_nand_cs *cs;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	ret = atmel_smc_nand_prepare_smcconf(nand, conf, &smcconf);
+	if (ret)
+		return ret;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	cs = &nand->cs[csline];
+	cs->smcconf = smcconf;
+	atmel_smc_cs_conf_apply(nc->smc, cs->id, &cs->smcconf);
+
+	return 0;
+}
+
+static int atmel_hsmc_nand_setup_data_interface(struct atmel_nand *nand,
+					int csline,
+					const struct nand_data_interface *conf)
+{
+	struct atmel_nand_controller *nc;
+	struct atmel_smc_cs_conf smcconf;
+	struct atmel_nand_cs *cs;
+	int ret;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	ret = atmel_smc_nand_prepare_smcconf(nand, conf, &smcconf);
+	if (ret)
+		return ret;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	cs = &nand->cs[csline];
+	cs->smcconf = smcconf;
+
+	if (cs->rb.type == ATMEL_NAND_NATIVE_RB)
+		cs->smcconf.timings |= ATMEL_HSMC_TIMINGS_RBNSEL(cs->rb.id);
+
+	atmel_hsmc_cs_conf_apply(nc->smc, cs->id, &cs->smcconf);
+
+	return 0;
+}
+
+static int atmel_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct atmel_nand *nand = to_atmel_nand(chip);
+	struct atmel_nand_controller *nc;
+
+	nc = to_nand_controller(nand->base.controller);
+
+	if (csline >= nand->numcs ||
+	    (csline < 0 && csline != NAND_DATA_IFACE_CHECK_ONLY))
+		return -EINVAL;
+
+	return nc->caps->ops->setup_data_interface(nand, csline, conf);
+}
+
 static void atmel_nand_init(struct atmel_nand_controller *nc,
 			    struct atmel_nand *nand)
 {
@@ -1192,6 +1489,9 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 	chip->write_buf = atmel_nand_write_buf;
 	chip->select_chip = atmel_nand_select_chip;
 
+	if (nc->mck && nc->caps->ops->setup_data_interface)
+		chip->setup_data_interface = atmel_nand_setup_data_interface;
+
 	/* Some NANDs require a longer delay than the default one (20us). */
 	chip->chip_delay = 40;
 
@@ -1677,6 +1977,12 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
 	if (nc->caps->legacy_of_bindings)
 		return 0;
 
+	nc->mck = of_clk_get(dev->parent->of_node, 0);
+	if (IS_ERR(nc->mck)) {
+		dev_err(dev, "Failed to retrieve MCK clk\n");
+		return PTR_ERR(nc->mck);
+	}
+
 	np = of_parse_phandle(dev->parent->of_node, "atmel,smc", 0);
 	if (!np) {
 		dev_err(dev, "Missing or invalid atmel,smc property\n");
@@ -1983,6 +2289,7 @@ static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = {
 	.remove = atmel_hsmc_nand_controller_remove,
 	.ecc_init = atmel_hsmc_nand_ecc_init,
 	.nand_init = atmel_hsmc_nand_init,
+	.setup_data_interface = atmel_hsmc_nand_setup_data_interface,
 };
 
 static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = {
@@ -2037,7 +2344,14 @@ atmel_smc_nand_controller_remove(struct atmel_nand_controller *nc)
 	return 0;
 }
 
-static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+/*
+ * The SMC reg layout of at91rm9200 is completely different which prevents us
+ * from re-using atmel_smc_nand_setup_data_interface() for the
+ * ->setup_data_interface() hook.
+ * At this point, there's no support for the at91rm9200 SMC IP, so we leave
+ * ->setup_data_interface() unassigned.
+ */
+static const struct atmel_nand_controller_ops at91rm9200_nc_ops = {
 	.probe = atmel_smc_nand_controller_probe,
 	.remove = atmel_smc_nand_controller_remove,
 	.ecc_init = atmel_nand_ecc_init,
@@ -2047,6 +2361,20 @@ static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
 static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = {
 	.ale_offs = BIT(21),
 	.cle_offs = BIT(22),
+	.ops = &at91rm9200_nc_ops,
+};
+
+static const struct atmel_nand_controller_ops atmel_smc_nc_ops = {
+	.probe = atmel_smc_nand_controller_probe,
+	.remove = atmel_smc_nand_controller_remove,
+	.ecc_init = atmel_nand_ecc_init,
+	.nand_init = atmel_smc_nand_init,
+	.setup_data_interface = atmel_smc_nand_setup_data_interface,
+};
+
+static const struct atmel_nand_controller_caps atmel_sam9260_nc_caps = {
+	.ale_offs = BIT(21),
+	.cle_offs = BIT(22),
 	.ops = &atmel_smc_nc_ops,
 };
 
@@ -2093,7 +2421,7 @@ static const struct of_device_id atmel_nand_controller_of_ids[] = {
 	},
 	{
 		.compatible = "atmel,at91sam9260-nand-controller",
-		.data = &atmel_rm9200_nc_caps,
+		.data = &atmel_sam9260_nc_caps,
 	},
 	{
 		.compatible = "atmel,at91sam9261-nand-controller",
@@ -2181,6 +2509,24 @@ static int atmel_nand_controller_remove(struct platform_device *pdev)
 	return nc->caps->ops->remove(nc);
 }
 
+static __maybe_unused int atmel_nand_controller_resume(struct device *dev)
+{
+	struct atmel_nand_controller *nc = dev_get_drvdata(dev);
+	struct atmel_nand *nand;
+
+	list_for_each_entry(nand, &nc->chips, node) {
+		int i;
+
+		for (i = 0; i < nand->numcs; i++)
+			nand_reset(&nand->base, i);
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(atmel_nand_controller_pm_ops, NULL,
+			 atmel_nand_controller_resume);
+
 static struct platform_driver atmel_nand_controller_driver = {
 	.driver = {
 		.name = "atmel-nand-controller",

diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
index f1da4ea..54bac5b 100644
--- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c

@@ -392,6 +392,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
 	b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
+	b47n->nand_chip.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	b47n->nand_chip.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	nand_chip->chip_delay = 50;
 	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;

diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index d40c32d..2fd733e 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c

@@ -654,6 +654,8 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.read_buf = cafe_read_buf;
 	cafe->nand.write_buf = cafe_write_buf;
 	cafe->nand.select_chip = cafe_select_chip;
+	cafe->nand.onfi_set_features = nand_onfi_get_set_features_notsupp;
+	cafe->nand.onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	cafe->nand.chip_delay = 0;
 

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index 531c519..7b26e53 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c

@@ -771,11 +771,14 @@ static int nand_davinci_probe(struct platform_device *pdev)
 			info->chip.ecc.hwctl = nand_davinci_hwctl_4bit;
 			info->chip.ecc.bytes = 10;
 			info->chip.ecc.options = NAND_ECC_GENERIC_ERASED_CHECK;
+			info->chip.ecc.algo = NAND_ECC_BCH;
 		} else {
+			/* 1bit ecc hamming */
 			info->chip.ecc.calculate = nand_davinci_calculate_1bit;
 			info->chip.ecc.correct = nand_davinci_correct_1bit;
 			info->chip.ecc.hwctl = nand_davinci_hwctl_1bit;
 			info->chip.ecc.bytes = 3;
+			info->chip.ecc.algo = NAND_ECC_HAMMING;
 		}
 		info->chip.ecc.size = 512;
 		info->chip.ecc.strength = pdata->ecc_bits;

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 16634df..d723be3 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c

@@ -23,50 +23,43 @@
 #include <linux/mutex.h>
 #include <linux/mtd/mtd.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include "denali.h"
 
 MODULE_LICENSE("GPL");
 
-/*
- * We define a module parameter that allows the user to override
- * the hardware and decide what timing mode should be used.
- */
-#define NAND_DEFAULT_TIMINGS	-1
-
-static int onfi_timing_mode = NAND_DEFAULT_TIMINGS;
-module_param(onfi_timing_mode, int, S_IRUGO);
-MODULE_PARM_DESC(onfi_timing_mode,
-	   "Overrides default ONFI setting. -1 indicates use default timings");
-
 #define DENALI_NAND_NAME    "denali-nand"
 
-/*
- * We define a macro here that combines all interrupts this driver uses into
- * a single constant value, for convenience.
- */
-#define DENALI_IRQ_ALL	(INTR__DMA_CMD_COMP | \
-			INTR__ECC_TRANSACTION_DONE | \
-			INTR__ECC_ERR | \
-			INTR__PROGRAM_FAIL | \
-			INTR__LOAD_COMP | \
-			INTR__PROGRAM_COMP | \
-			INTR__TIME_OUT | \
-			INTR__ERASE_FAIL | \
-			INTR__RST_COMP | \
-			INTR__ERASE_COMP)
+/* Host Data/Command Interface */
+#define DENALI_HOST_ADDR	0x00
+#define DENALI_HOST_DATA	0x10
+
+#define DENALI_MAP00		(0 << 26)	/* direct access to buffer */
+#define DENALI_MAP01		(1 << 26)	/* read/write pages in PIO */
+#define DENALI_MAP10		(2 << 26)	/* high-level control plane */
+#define DENALI_MAP11		(3 << 26)	/* direct controller access */
+
+/* MAP11 access cycle type */
+#define DENALI_MAP11_CMD	((DENALI_MAP11) | 0)	/* command cycle */
+#define DENALI_MAP11_ADDR	((DENALI_MAP11) | 1)	/* address cycle */
+#define DENALI_MAP11_DATA	((DENALI_MAP11) | 2)	/* data cycle */
+
+/* MAP10 commands */
+#define DENALI_ERASE		0x01
+
+#define DENALI_BANK(denali)	((denali)->active_bank << 24)
+
+#define DENALI_INVALID_BANK	-1
+#define DENALI_NR_BANKS		4
 
 /*
- * indicates whether or not the internal value for the flash bank is
- * valid or not
+ * The bus interface clock, clk_x, is phase aligned with the core clock.  The
+ * clk_x is an integral multiple N of the core clk.  The value N is configured
+ * at IP delivery time, and its available value is 4, 5, or 6.  We need to align
+ * to the largest value to make it work with any possible configuration.
  */
-#define CHIP_SELECT_INVALID	-1
-
-/*
- * This macro divides two integers and rounds fractional values up
- * to the nearest integer value.
- */
-#define CEIL_DIV(X, Y) (((X)%(Y)) ? ((X)/(Y)+1) : ((X)/(Y)))
+#define DENALI_CLK_X_MULT	6
 
 /*
  * this macro allows us to convert from an MTD structure to our own
@@ -77,339 +70,11 @@ static inline struct denali_nand_info *mtd_to_denali(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct denali_nand_info, nand);
 }
 
-/*
- * These constants are defined by the driver to enable common driver
- * configuration options.
- */
-#define SPARE_ACCESS		0x41
-#define MAIN_ACCESS		0x42
-#define MAIN_SPARE_ACCESS	0x43
-
-#define DENALI_READ	0
-#define DENALI_WRITE	0x100
-
-/*
- * this is a helper macro that allows us to
- * format the bank into the proper bits for the controller
- */
-#define BANK(x) ((x) << 24)
-
-/* forward declarations */
-static void clear_interrupts(struct denali_nand_info *denali);
-static uint32_t wait_for_irq(struct denali_nand_info *denali,
-							uint32_t irq_mask);
-static void denali_irq_enable(struct denali_nand_info *denali,
-							uint32_t int_mask);
-static uint32_t read_interrupt_status(struct denali_nand_info *denali);
-
-/*
- * Certain operations for the denali NAND controller use an indexed mode to
- * read/write data. The operation is performed by writing the address value
- * of the command to the device memory followed by the data. This function
- * abstracts this common operation.
- */
-static void index_addr(struct denali_nand_info *denali,
-				uint32_t address, uint32_t data)
+static void denali_host_write(struct denali_nand_info *denali,
+			      uint32_t addr, uint32_t data)
 {
-	iowrite32(address, denali->flash_mem);
-	iowrite32(data, denali->flash_mem + 0x10);
-}
-
-/* Perform an indexed read of the device */
-static void index_addr_read_data(struct denali_nand_info *denali,
-				 uint32_t address, uint32_t *pdata)
-{
-	iowrite32(address, denali->flash_mem);
-	*pdata = ioread32(denali->flash_mem + 0x10);
-}
-
-/*
- * We need to buffer some data for some of the NAND core routines.
- * The operations manage buffering that data.
- */
-static void reset_buf(struct denali_nand_info *denali)
-{
-	denali->buf.head = denali->buf.tail = 0;
-}
-
-static void write_byte_to_buf(struct denali_nand_info *denali, uint8_t byte)
-{
-	denali->buf.buf[denali->buf.tail++] = byte;
-}
-
-/* reads the status of the device */
-static void read_status(struct denali_nand_info *denali)
-{
-	uint32_t cmd;
-
-	/* initialize the data buffer to store status */
-	reset_buf(denali);
-
-	cmd = ioread32(denali->flash_reg + WRITE_PROTECT);
-	if (cmd)
-		write_byte_to_buf(denali, NAND_STATUS_WP);
-	else
-		write_byte_to_buf(denali, 0);
-}
-
-/* resets a specific device connected to the core */
-static void reset_bank(struct denali_nand_info *denali)
-{
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__RST_COMP | INTR__TIME_OUT;
-
-	clear_interrupts(denali);
-
-	iowrite32(1 << denali->flash_bank, denali->flash_reg + DEVICE_RESET);
-
-	irq_status = wait_for_irq(denali, irq_mask);
-
-	if (irq_status & INTR__TIME_OUT)
-		dev_err(denali->dev, "reset bank failed.\n");
-}
-
-/* Reset the flash controller */
-static uint16_t denali_nand_reset(struct denali_nand_info *denali)
-{
-	int i;
-
-	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
-		denali->flash_reg + INTR_STATUS(i));
-
-	for (i = 0; i < denali->max_banks; i++) {
-		iowrite32(1 << i, denali->flash_reg + DEVICE_RESET);
-		while (!(ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			(INTR__RST_COMP | INTR__TIME_OUT)))
-			cpu_relax();
-		if (ioread32(denali->flash_reg + INTR_STATUS(i)) &
-			INTR__TIME_OUT)
-			dev_dbg(denali->dev,
-			"NAND Reset operation timed out on bank %d\n", i);
-	}
-
-	for (i = 0; i < denali->max_banks; i++)
-		iowrite32(INTR__RST_COMP | INTR__TIME_OUT,
-			  denali->flash_reg + INTR_STATUS(i));
-
-	return PASS;
-}
-
-/*
- * this routine calculates the ONFI timing values for a given mode and
- * programs the clocking register accordingly. The mode is determined by
- * the get_onfi_nand_para routine.
- */
-static void nand_onfi_timing_set(struct denali_nand_info *denali,
-								uint16_t mode)
-{
-	uint16_t Trea[6] = {40, 30, 25, 20, 20, 16};
-	uint16_t Trp[6] = {50, 25, 17, 15, 12, 10};
-	uint16_t Treh[6] = {30, 15, 15, 10, 10, 7};
-	uint16_t Trc[6] = {100, 50, 35, 30, 25, 20};
-	uint16_t Trhoh[6] = {0, 15, 15, 15, 15, 15};
-	uint16_t Trloh[6] = {0, 0, 0, 0, 5, 5};
-	uint16_t Tcea[6] = {100, 45, 30, 25, 25, 25};
-	uint16_t Tadl[6] = {200, 100, 100, 100, 70, 70};
-	uint16_t Trhw[6] = {200, 100, 100, 100, 100, 100};
-	uint16_t Trhz[6] = {200, 100, 100, 100, 100, 100};
-	uint16_t Twhr[6] = {120, 80, 80, 60, 60, 60};
-	uint16_t Tcs[6] = {70, 35, 25, 25, 20, 15};
-
-	uint16_t data_invalid_rhoh, data_invalid_rloh, data_invalid;
-	uint16_t dv_window = 0;
-	uint16_t en_lo, en_hi;
-	uint16_t acc_clks;
-	uint16_t addr_2_data, re_2_we, re_2_re, we_2_re, cs_cnt;
-
-	en_lo = CEIL_DIV(Trp[mode], CLK_X);
-	en_hi = CEIL_DIV(Treh[mode], CLK_X);
-#if ONFI_BLOOM_TIME
-	if ((en_hi * CLK_X) < (Treh[mode] + 2))
-		en_hi++;
-#endif
-
-	if ((en_lo + en_hi) * CLK_X < Trc[mode])
-		en_lo += CEIL_DIV((Trc[mode] - (en_lo + en_hi) * CLK_X), CLK_X);
-
-	if ((en_lo + en_hi) < CLK_MULTI)
-		en_lo += CLK_MULTI - en_lo - en_hi;
-
-	while (dv_window < 8) {
-		data_invalid_rhoh = en_lo * CLK_X + Trhoh[mode];
-
-		data_invalid_rloh = (en_lo + en_hi) * CLK_X + Trloh[mode];
-
-		data_invalid = data_invalid_rhoh < data_invalid_rloh ?
-					data_invalid_rhoh : data_invalid_rloh;
-
-		dv_window = data_invalid - Trea[mode];
-
-		if (dv_window < 8)
-			en_lo++;
-	}
-
-	acc_clks = CEIL_DIV(Trea[mode], CLK_X);
-
-	while (acc_clks * CLK_X - Trea[mode] < 3)
-		acc_clks++;
-
-	if (data_invalid - acc_clks * CLK_X < 2)
-		dev_warn(denali->dev, "%s, Line %d: Warning!\n",
-			 __FILE__, __LINE__);
-
-	addr_2_data = CEIL_DIV(Tadl[mode], CLK_X);
-	re_2_we = CEIL_DIV(Trhw[mode], CLK_X);
-	re_2_re = CEIL_DIV(Trhz[mode], CLK_X);
-	we_2_re = CEIL_DIV(Twhr[mode], CLK_X);
-	cs_cnt = CEIL_DIV((Tcs[mode] - Trp[mode]), CLK_X);
-	if (cs_cnt == 0)
-		cs_cnt = 1;
-
-	if (Tcea[mode]) {
-		while (cs_cnt * CLK_X + Trea[mode] < Tcea[mode])
-			cs_cnt++;
-	}
-
-#if MODE5_WORKAROUND
-	if (mode == 5)
-		acc_clks = 5;
-#endif
-
-	/* Sighting 3462430: Temporary hack for MT29F128G08CJABAWP:B */
-	if (ioread32(denali->flash_reg + MANUFACTURER_ID) == 0 &&
-		ioread32(denali->flash_reg + DEVICE_ID) == 0x88)
-		acc_clks = 6;
-
-	iowrite32(acc_clks, denali->flash_reg + ACC_CLKS);
-	iowrite32(re_2_we, denali->flash_reg + RE_2_WE);
-	iowrite32(re_2_re, denali->flash_reg + RE_2_RE);
-	iowrite32(we_2_re, denali->flash_reg + WE_2_RE);
-	iowrite32(addr_2_data, denali->flash_reg + ADDR_2_DATA);
-	iowrite32(en_lo, denali->flash_reg + RDWR_EN_LO_CNT);
-	iowrite32(en_hi, denali->flash_reg + RDWR_EN_HI_CNT);
-	iowrite32(cs_cnt, denali->flash_reg + CS_SETUP_CNT);
-}
-
-/* queries the NAND device to see what ONFI modes it supports. */
-static uint16_t get_onfi_nand_para(struct denali_nand_info *denali)
-{
-	int i;
-
-	/*
-	 * we needn't to do a reset here because driver has already
-	 * reset all the banks before
-	 */
-	if (!(ioread32(denali->flash_reg + ONFI_TIMING_MODE) &
-		ONFI_TIMING_MODE__VALUE))
-		return FAIL;
-
-	for (i = 5; i > 0; i--) {
-		if (ioread32(denali->flash_reg + ONFI_TIMING_MODE) &
-			(0x01 << i))
-			break;
-	}
-
-	nand_onfi_timing_set(denali, i);
-
-	/*
-	 * By now, all the ONFI devices we know support the page cache
-	 * rw feature. So here we enable the pipeline_rw_ahead feature
-	 */
-	/* iowrite32(1, denali->flash_reg + CACHE_WRITE_ENABLE); */
-	/* iowrite32(1, denali->flash_reg + CACHE_READ_ENABLE);  */
-
-	return PASS;
-}
-
-static void get_samsung_nand_para(struct denali_nand_info *denali,
-							uint8_t device_id)
-{
-	if (device_id == 0xd3) { /* Samsung K9WAG08U1A */
-		/* Set timing register values according to datasheet */
-		iowrite32(5, denali->flash_reg + ACC_CLKS);
-		iowrite32(20, denali->flash_reg + RE_2_WE);
-		iowrite32(12, denali->flash_reg + WE_2_RE);
-		iowrite32(14, denali->flash_reg + ADDR_2_DATA);
-		iowrite32(3, denali->flash_reg + RDWR_EN_LO_CNT);
-		iowrite32(2, denali->flash_reg + RDWR_EN_HI_CNT);
-		iowrite32(2, denali->flash_reg + CS_SETUP_CNT);
-	}
-}
-
-static void get_toshiba_nand_para(struct denali_nand_info *denali)
-{
-	/*
-	 * Workaround to fix a controller bug which reports a wrong
-	 * spare area size for some kind of Toshiba NAND device
-	 */
-	if ((ioread32(denali->flash_reg + DEVICE_MAIN_AREA_SIZE) == 4096) &&
-		(ioread32(denali->flash_reg + DEVICE_SPARE_AREA_SIZE) == 64))
-		iowrite32(216, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-}
-
-static void get_hynix_nand_para(struct denali_nand_info *denali,
-							uint8_t device_id)
-{
-	switch (device_id) {
-	case 0xD5: /* Hynix H27UAG8T2A, H27UBG8U5A or H27UCG8VFA */
-	case 0xD7: /* Hynix H27UDG8VEM, H27UCG8UDM or H27UCG8V5A */
-		iowrite32(128, denali->flash_reg + PAGES_PER_BLOCK);
-		iowrite32(4096, denali->flash_reg + DEVICE_MAIN_AREA_SIZE);
-		iowrite32(224, denali->flash_reg + DEVICE_SPARE_AREA_SIZE);
-		iowrite32(0, denali->flash_reg + DEVICE_WIDTH);
-		break;
-	default:
-		dev_warn(denali->dev,
-			 "Unknown Hynix NAND (Device ID: 0x%x).\n"
-			 "Will use default parameter values instead.\n",
-			 device_id);
-	}
-}
-
-/*
- * determines how many NAND chips are connected to the controller. Note for
- * Intel CE4100 devices we don't support more than one device.
- */
-static void find_valid_banks(struct denali_nand_info *denali)
-{
-	uint32_t id[denali->max_banks];
-	int i;
-
-	denali->total_used_banks = 1;
-	for (i = 0; i < denali->max_banks; i++) {
-		index_addr(denali, MODE_11 | (i << 24) | 0, 0x90);
-		index_addr(denali, MODE_11 | (i << 24) | 1, 0);
-		index_addr_read_data(denali, MODE_11 | (i << 24) | 2, &id[i]);
-
-		dev_dbg(denali->dev,
-			"Return 1st ID for bank[%d]: %x\n", i, id[i]);
-
-		if (i == 0) {
-			if (!(id[i] & 0x0ff))
-				break; /* WTF? */
-		} else {
-			if ((id[i] & 0x0ff) == (id[0] & 0x0ff))
-				denali->total_used_banks++;
-			else
-				break;
-		}
-	}
-
-	if (denali->platform == INTEL_CE4100) {
-		/*
-		 * Platform limitations of the CE4100 device limit
-		 * users to a single chip solution for NAND.
-		 * Multichip support is not enabled.
-		 */
-		if (denali->total_used_banks != 1) {
-			dev_err(denali->dev,
-				"Sorry, Intel CE4100 only supports a single NAND device.\n");
-			BUG();
-		}
-	}
-	dev_dbg(denali->dev,
-		"denali->total_used_banks: %d\n", denali->total_used_banks);
+	iowrite32(addr, denali->host + DENALI_HOST_ADDR);
+	iowrite32(data, denali->host + DENALI_HOST_DATA);
 }
 
 /*
@@ -418,7 +83,7 @@ static void find_valid_banks(struct denali_nand_info *denali)
  */
 static void detect_max_banks(struct denali_nand_info *denali)
 {
-	uint32_t features = ioread32(denali->flash_reg + FEATURES);
+	uint32_t features = ioread32(denali->reg + FEATURES);
 
 	denali->max_banks = 1 << (features & FEATURES__N_BANKS);
 
@@ -427,227 +92,120 @@ static void detect_max_banks(struct denali_nand_info *denali)
 		denali->max_banks <<= 1;
 }
 
-static uint16_t denali_nand_timing_set(struct denali_nand_info *denali)
-{
-	uint16_t status = PASS;
-	uint32_t id_bytes[8], addr;
-	uint8_t maf_id, device_id;
-	int i;
-
-	/*
-	 * Use read id method to get device ID and other params.
-	 * For some NAND chips, controller can't report the correct
-	 * device ID by reading from DEVICE_ID register
-	 */
-	addr = MODE_11 | BANK(denali->flash_bank);
-	index_addr(denali, addr | 0, 0x90);
-	index_addr(denali, addr | 1, 0);
-	for (i = 0; i < 8; i++)
-		index_addr_read_data(denali, addr | 2, &id_bytes[i]);
-	maf_id = id_bytes[0];
-	device_id = id_bytes[1];
-
-	if (ioread32(denali->flash_reg + ONFI_DEVICE_NO_OF_LUNS) &
-		ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE) { /* ONFI 1.0 NAND */
-		if (FAIL == get_onfi_nand_para(denali))
-			return FAIL;
-	} else if (maf_id == 0xEC) { /* Samsung NAND */
-		get_samsung_nand_para(denali, device_id);
-	} else if (maf_id == 0x98) { /* Toshiba NAND */
-		get_toshiba_nand_para(denali);
-	} else if (maf_id == 0xAD) { /* Hynix NAND */
-		get_hynix_nand_para(denali, device_id);
-	}
-
-	dev_info(denali->dev,
-			"Dump timing register values:\n"
-			"acc_clks: %d, re_2_we: %d, re_2_re: %d\n"
-			"we_2_re: %d, addr_2_data: %d, rdwr_en_lo_cnt: %d\n"
-			"rdwr_en_hi_cnt: %d, cs_setup_cnt: %d\n",
-			ioread32(denali->flash_reg + ACC_CLKS),
-			ioread32(denali->flash_reg + RE_2_WE),
-			ioread32(denali->flash_reg + RE_2_RE),
-			ioread32(denali->flash_reg + WE_2_RE),
-			ioread32(denali->flash_reg + ADDR_2_DATA),
-			ioread32(denali->flash_reg + RDWR_EN_LO_CNT),
-			ioread32(denali->flash_reg + RDWR_EN_HI_CNT),
-			ioread32(denali->flash_reg + CS_SETUP_CNT));
-
-	find_valid_banks(denali);
-
-	/*
-	 * If the user specified to override the default timings
-	 * with a specific ONFI mode, we apply those changes here.
-	 */
-	if (onfi_timing_mode != NAND_DEFAULT_TIMINGS)
-		nand_onfi_timing_set(denali, onfi_timing_mode);
-
-	return status;
-}
-
-static void denali_set_intr_modes(struct denali_nand_info *denali,
-					uint16_t INT_ENABLE)
-{
-	if (INT_ENABLE)
-		iowrite32(1, denali->flash_reg + GLOBAL_INT_ENABLE);
-	else
-		iowrite32(0, denali->flash_reg + GLOBAL_INT_ENABLE);
-}
-
-/*
- * validation function to verify that the controlling software is making
- * a valid request
- */
-static inline bool is_flash_bank_valid(int flash_bank)
-{
-	return flash_bank >= 0 && flash_bank < 4;
-}
-
-static void denali_irq_init(struct denali_nand_info *denali)
-{
-	uint32_t int_mask;
-	int i;
-
-	/* Disable global interrupts */
-	denali_set_intr_modes(denali, false);
-
-	int_mask = DENALI_IRQ_ALL;
-
-	/* Clear all status bits */
-	for (i = 0; i < denali->max_banks; ++i)
-		iowrite32(0xFFFF, denali->flash_reg + INTR_STATUS(i));
-
-	denali_irq_enable(denali, int_mask);
-}
-
-static void denali_irq_cleanup(int irqnum, struct denali_nand_info *denali)
-{
-	denali_set_intr_modes(denali, false);
-}
-
-static void denali_irq_enable(struct denali_nand_info *denali,
-							uint32_t int_mask)
+static void denali_enable_irq(struct denali_nand_info *denali)
 {
 	int i;
 
-	for (i = 0; i < denali->max_banks; ++i)
-		iowrite32(int_mask, denali->flash_reg + INTR_EN(i));
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		iowrite32(U32_MAX, denali->reg + INTR_EN(i));
+	iowrite32(GLOBAL_INT_EN_FLAG, denali->reg + GLOBAL_INT_ENABLE);
 }
 
-/*
- * This function only returns when an interrupt that this driver cares about
- * occurs. This is to reduce the overhead of servicing interrupts
- */
-static inline uint32_t denali_irq_detected(struct denali_nand_info *denali)
+static void denali_disable_irq(struct denali_nand_info *denali)
 {
-	return read_interrupt_status(denali) & DENALI_IRQ_ALL;
+	int i;
+
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		iowrite32(0, denali->reg + INTR_EN(i));
+	iowrite32(0, denali->reg + GLOBAL_INT_ENABLE);
 }
 
-/* Interrupts are cleared by writing a 1 to the appropriate status bit */
-static inline void clear_interrupt(struct denali_nand_info *denali,
-							uint32_t irq_mask)
+static void denali_clear_irq(struct denali_nand_info *denali,
+			     int bank, uint32_t irq_status)
 {
-	uint32_t intr_status_reg;
-
-	intr_status_reg = INTR_STATUS(denali->flash_bank);
-
-	iowrite32(irq_mask, denali->flash_reg + intr_status_reg);
+	/* write one to clear bits */
+	iowrite32(irq_status, denali->reg + INTR_STATUS(bank));
 }
 
-static void clear_interrupts(struct denali_nand_info *denali)
+static void denali_clear_irq_all(struct denali_nand_info *denali)
 {
-	uint32_t status;
+	int i;
 
-	spin_lock_irq(&denali->irq_lock);
-
-	status = read_interrupt_status(denali);
-	clear_interrupt(denali, status);
-
-	denali->irq_status = 0x0;
-	spin_unlock_irq(&denali->irq_lock);
+	for (i = 0; i < DENALI_NR_BANKS; i++)
+		denali_clear_irq(denali, i, U32_MAX);
 }
 
-static uint32_t read_interrupt_status(struct denali_nand_info *denali)
-{
-	uint32_t intr_status_reg;
-
-	intr_status_reg = INTR_STATUS(denali->flash_bank);
-
-	return ioread32(denali->flash_reg + intr_status_reg);
-}
-
-/*
- * This is the interrupt service routine. It handles all interrupts
- * sent to this device. Note that on CE4100, this is a shared interrupt.
- */
 static irqreturn_t denali_isr(int irq, void *dev_id)
 {
 	struct denali_nand_info *denali = dev_id;
+	irqreturn_t ret = IRQ_NONE;
 	uint32_t irq_status;
-	irqreturn_t result = IRQ_NONE;
+	int i;
 
 	spin_lock(&denali->irq_lock);
 
-	/* check to see if a valid NAND chip has been selected. */
-	if (is_flash_bank_valid(denali->flash_bank)) {
-		/*
-		 * check to see if controller generated the interrupt,
-		 * since this is a shared interrupt
-		 */
-		irq_status = denali_irq_detected(denali);
-		if (irq_status != 0) {
-			/* handle interrupt */
-			/* first acknowledge it */
-			clear_interrupt(denali, irq_status);
-			/*
-			 * store the status in the device context for someone
-			 * to read
-			 */
-			denali->irq_status |= irq_status;
-			/* notify anyone who cares that it happened */
+	for (i = 0; i < DENALI_NR_BANKS; i++) {
+		irq_status = ioread32(denali->reg + INTR_STATUS(i));
+		if (irq_status)
+			ret = IRQ_HANDLED;
+
+		denali_clear_irq(denali, i, irq_status);
+
+		if (i != denali->active_bank)
+			continue;
+
+		denali->irq_status |= irq_status;
+
+		if (denali->irq_status & denali->irq_mask)
 			complete(&denali->complete);
-			/* tell the OS that we've handled this */
-			result = IRQ_HANDLED;
-		}
 	}
+
 	spin_unlock(&denali->irq_lock);
-	return result;
+
+	return ret;
 }
 
-static uint32_t wait_for_irq(struct denali_nand_info *denali, uint32_t irq_mask)
+static void denali_reset_irq(struct denali_nand_info *denali)
 {
-	unsigned long comp_res;
-	uint32_t intr_status;
-	unsigned long timeout = msecs_to_jiffies(1000);
+	unsigned long flags;
 
-	do {
-		comp_res =
-			wait_for_completion_timeout(&denali->complete, timeout);
-		spin_lock_irq(&denali->irq_lock);
-		intr_status = denali->irq_status;
+	spin_lock_irqsave(&denali->irq_lock, flags);
+	denali->irq_status = 0;
+	denali->irq_mask = 0;
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
+}
 
-		if (intr_status & irq_mask) {
-			denali->irq_status &= ~irq_mask;
-			spin_unlock_irq(&denali->irq_lock);
-			/* our interrupt was detected */
-			break;
-		}
+static uint32_t denali_wait_for_irq(struct denali_nand_info *denali,
+				    uint32_t irq_mask)
+{
+	unsigned long time_left, flags;
+	uint32_t irq_status;
 
-		/*
-		 * these are not the interrupts you are looking for -
-		 * need to wait again
-		 */
-		spin_unlock_irq(&denali->irq_lock);
-	} while (comp_res != 0);
+	spin_lock_irqsave(&denali->irq_lock, flags);
 
-	if (comp_res == 0) {
-		/* timeout */
-		pr_err("timeout occurred, status = 0x%x, mask = 0x%x\n",
-				intr_status, irq_mask);
+	irq_status = denali->irq_status;
 
-		intr_status = 0;
+	if (irq_mask & irq_status) {
+		/* return immediately if the IRQ has already happened. */
+		spin_unlock_irqrestore(&denali->irq_lock, flags);
+		return irq_status;
 	}
-	return intr_status;
+
+	denali->irq_mask = irq_mask;
+	reinit_completion(&denali->complete);
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
+
+	time_left = wait_for_completion_timeout(&denali->complete,
+						msecs_to_jiffies(1000));
+	if (!time_left) {
+		dev_err(denali->dev, "timeout while waiting for irq 0x%x\n",
+			denali->irq_mask);
+		return 0;
+	}
+
+	return denali->irq_status;
+}
+
+static uint32_t denali_check_irq(struct denali_nand_info *denali)
+{
+	unsigned long flags;
+	uint32_t irq_status;
+
+	spin_lock_irqsave(&denali->irq_lock, flags);
+	irq_status = denali->irq_status;
+	spin_unlock_irqrestore(&denali->irq_lock, flags);
+
+	return irq_status;
 }
 
 /*
@@ -664,153 +222,111 @@ static void setup_ecc_for_xfer(struct denali_nand_info *denali, bool ecc_en,
 	transfer_spare_flag = transfer_spare ? TRANSFER_SPARE_REG__FLAG : 0;
 
 	/* Enable spare area/ECC per user's request. */
-	iowrite32(ecc_en_flag, denali->flash_reg + ECC_ENABLE);
-	iowrite32(transfer_spare_flag, denali->flash_reg + TRANSFER_SPARE_REG);
+	iowrite32(ecc_en_flag, denali->reg + ECC_ENABLE);
+	iowrite32(transfer_spare_flag, denali->reg + TRANSFER_SPARE_REG);
 }
 
-/*
- * sends a pipeline command operation to the controller. See the Denali NAND
- * controller's user guide for more information (section 4.2.3.6).
- */
-static int denali_send_pipeline_cmd(struct denali_nand_info *denali,
-				    bool ecc_en, bool transfer_spare,
-				    int access_type, int op)
-{
-	int status = PASS;
-	uint32_t addr, cmd;
-
-	setup_ecc_for_xfer(denali, ecc_en, transfer_spare);
-
-	clear_interrupts(denali);
-
-	addr = BANK(denali->flash_bank) | denali->page;
-
-	if (op == DENALI_WRITE && access_type != SPARE_ACCESS) {
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else if (op == DENALI_WRITE && access_type == SPARE_ACCESS) {
-		/* read spare area */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	} else if (op == DENALI_READ) {
-		/* setup page read request for access type */
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, access_type);
-
-		cmd = MODE_01 | addr;
-		iowrite32(cmd, denali->flash_mem);
-	}
-	return status;
-}
-
-/* helper function that simply writes a buffer to the flash */
-static int write_data_to_flash_mem(struct denali_nand_info *denali,
-				   const uint8_t *buf, int len)
-{
-	uint32_t *buf32;
-	int i;
-
-	/*
-	 * verify that the len is a multiple of 4.
-	 * see comment in read_data_from_flash_mem()
-	 */
-	BUG_ON((len % 4) != 0);
-
-	/* write the data to the flash memory */
-	buf32 = (uint32_t *)buf;
-	for (i = 0; i < len / 4; i++)
-		iowrite32(*buf32++, denali->flash_mem + 0x10);
-	return i * 4; /* intent is to return the number of bytes read */
-}
-
-/* helper function that simply reads a buffer from the flash */
-static int read_data_from_flash_mem(struct denali_nand_info *denali,
-				    uint8_t *buf, int len)
-{
-	uint32_t *buf32;
-	int i;
-
-	/*
-	 * we assume that len will be a multiple of 4, if not it would be nice
-	 * to know about it ASAP rather than have random failures...
-	 * This assumption is based on the fact that this function is designed
-	 * to be used to read flash pages, which are typically multiples of 4.
-	 */
-	BUG_ON((len % 4) != 0);
-
-	/* transfer the data from the flash */
-	buf32 = (uint32_t *)buf;
-	for (i = 0; i < len / 4; i++)
-		*buf32++ = ioread32(denali->flash_mem + 0x10);
-	return i * 4; /* intent is to return the number of bytes read */
-}
-
-/* writes OOB data to the device */
-static int write_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
+static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_status;
-	uint32_t irq_mask = INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL;
-	int status = 0;
+	int i;
 
-	denali->page = page;
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
-	if (denali_send_pipeline_cmd(denali, false, false, SPARE_ACCESS,
-							DENALI_WRITE) == PASS) {
-		write_data_to_flash_mem(denali, buf, mtd->oobsize);
-
-		/* wait for operation to complete */
-		irq_status = wait_for_irq(denali, irq_mask);
-
-		if (irq_status == 0) {
-			dev_err(denali->dev, "OOB write failed\n");
-			status = -EIO;
-		}
-	} else {
-		dev_err(denali->dev, "unable to send pipeline command\n");
-		status = -EIO;
-	}
-	return status;
+	for (i = 0; i < len; i++)
+		buf[i] = ioread32(denali->host + DENALI_HOST_DATA);
 }
 
-/* reads OOB data from the device */
-static void read_oob_data(struct mtd_info *mtd, uint8_t *buf, int page)
+static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t irq_mask = INTR__LOAD_COMP;
-	uint32_t irq_status, addr, cmd;
+	int i;
 
-	denali->page = page;
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
-	if (denali_send_pipeline_cmd(denali, false, true, SPARE_ACCESS,
-							DENALI_READ) == PASS) {
-		read_data_from_flash_mem(denali, buf, mtd->oobsize);
+	for (i = 0; i < len; i++)
+		iowrite32(buf[i], denali->host + DENALI_HOST_DATA);
+}
 
-		/*
-		 * wait for command to be accepted
-		 * can always use status0 bit as the
-		 * mask is identical for each bank.
-		 */
-		irq_status = wait_for_irq(denali, irq_mask);
+static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint16_t *buf16 = (uint16_t *)buf;
+	int i;
 
-		if (irq_status == 0)
-			dev_err(denali->dev, "page on OOB timeout %d\n",
-					denali->page);
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
 
-		/*
-		 * We set the device back to MAIN_ACCESS here as I observed
-		 * instability with the controller if you do a block erase
-		 * and the last transaction was a SPARE_ACCESS. Block erase
-		 * is reliable (according to the MTD test infrastructure)
-		 * if you are in MAIN_ACCESS.
-		 */
-		addr = BANK(denali->flash_bank) | denali->page;
-		cmd = MODE_10 | addr;
-		index_addr(denali, cmd, MAIN_ACCESS);
-	}
+	for (i = 0; i < len / 2; i++)
+		buf16[i] = ioread32(denali->host + DENALI_HOST_DATA);
+}
+
+static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
+			       int len)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	const uint16_t *buf16 = (const uint16_t *)buf;
+	int i;
+
+	iowrite32(DENALI_MAP11_DATA | DENALI_BANK(denali),
+		  denali->host + DENALI_HOST_ADDR);
+
+	for (i = 0; i < len / 2; i++)
+		iowrite32(buf16[i], denali->host + DENALI_HOST_DATA);
+}
+
+static uint8_t denali_read_byte(struct mtd_info *mtd)
+{
+	uint8_t byte;
+
+	denali_read_buf(mtd, &byte, 1);
+
+	return byte;
+}
+
+static void denali_write_byte(struct mtd_info *mtd, uint8_t byte)
+{
+	denali_write_buf(mtd, &byte, 1);
+}
+
+static uint16_t denali_read_word(struct mtd_info *mtd)
+{
+	uint16_t word;
+
+	denali_read_buf16(mtd, (uint8_t *)&word, 2);
+
+	return word;
+}
+
+static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint32_t type;
+
+	if (ctrl & NAND_CLE)
+		type = DENALI_MAP11_CMD;
+	else if (ctrl & NAND_ALE)
+		type = DENALI_MAP11_ADDR;
+	else
+		return;
+
+	/*
+	 * Some commands are followed by chip->dev_ready or chip->waitfunc.
+	 * irq_status must be cleared here to catch the R/B# interrupt later.
+	 */
+	if (ctrl & NAND_CTRL_CHANGE)
+		denali_reset_irq(denali);
+
+	denali_host_write(denali, DENALI_BANK(denali) | type, dat);
+}
+
+static int denali_dev_ready(struct mtd_info *mtd)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+
+	return !!(denali_check_irq(denali) & INTR__INT_ACT);
 }
 
 static int denali_check_erased_page(struct mtd_info *mtd,
@@ -856,11 +372,11 @@ static int denali_hw_ecc_fixup(struct mtd_info *mtd,
 			       unsigned long *uncor_ecc_flags)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
-	int bank = denali->flash_bank;
+	int bank = denali->active_bank;
 	uint32_t ecc_cor;
 	unsigned int max_bitflips;
 
-	ecc_cor = ioread32(denali->flash_reg + ECC_COR_INFO(bank));
+	ecc_cor = ioread32(denali->reg + ECC_COR_INFO(bank));
 	ecc_cor >>= ECC_COR_INFO__SHIFT(bank);
 
 	if (ecc_cor & ECC_COR_INFO__UNCOR_ERR) {
@@ -886,8 +402,6 @@ static int denali_hw_ecc_fixup(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-#define ECC_SECTOR_SIZE 512
-
 #define ECC_SECTOR(x)	(((x) & ECC_ERROR_ADDRESS__SECTOR_NR) >> 12)
 #define ECC_BYTE(x)	(((x) & ECC_ERROR_ADDRESS__OFFSET))
 #define ECC_CORRECTION_VALUE(x) ((x) & ERR_CORRECTION_INFO__BYTEMASK)
@@ -899,22 +413,23 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			       struct denali_nand_info *denali,
 			       unsigned long *uncor_ecc_flags, uint8_t *buf)
 {
+	unsigned int ecc_size = denali->nand.ecc.size;
 	unsigned int bitflips = 0;
 	unsigned int max_bitflips = 0;
 	uint32_t err_addr, err_cor_info;
 	unsigned int err_byte, err_sector, err_device;
 	uint8_t err_cor_value;
 	unsigned int prev_sector = 0;
+	uint32_t irq_status;
 
-	/* read the ECC errors. we'll ignore them for now */
-	denali_set_intr_modes(denali, false);
+	denali_reset_irq(denali);
 
 	do {
-		err_addr = ioread32(denali->flash_reg + ECC_ERROR_ADDRESS);
+		err_addr = ioread32(denali->reg + ECC_ERROR_ADDRESS);
 		err_sector = ECC_SECTOR(err_addr);
 		err_byte = ECC_BYTE(err_addr);
 
-		err_cor_info = ioread32(denali->flash_reg + ERR_CORRECTION_INFO);
+		err_cor_info = ioread32(denali->reg + ERR_CORRECTION_INFO);
 		err_cor_value = ECC_CORRECTION_VALUE(err_cor_info);
 		err_device = ECC_ERR_DEVICE(err_cor_info);
 
@@ -928,9 +443,9 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			 * an erased sector.
 			 */
 			*uncor_ecc_flags |= BIT(err_sector);
-		} else if (err_byte < ECC_SECTOR_SIZE) {
+		} else if (err_byte < ecc_size) {
 			/*
-			 * If err_byte is larger than ECC_SECTOR_SIZE, means error
+			 * If err_byte is larger than ecc_size, means error
 			 * happened in OOB, so we ignore it. It's no need for
 			 * us to correct it err_device is represented the NAND
 			 * error bits are happened in if there are more than
@@ -939,8 +454,8 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 			int offset;
 			unsigned int flips_in_byte;
 
-			offset = (err_sector * ECC_SECTOR_SIZE + err_byte) *
-						denali->devnum + err_device;
+			offset = (err_sector * ecc_size + err_byte) *
+					denali->devs_per_cs + err_device;
 
 			/* correct the ECC error */
 			flips_in_byte = hweight8(buf[offset] ^ err_cor_value);
@@ -959,10 +474,9 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 	 * ECC_TRANSACTION_DONE interrupt, so here just wait for
 	 * a while for this interrupt
 	 */
-	while (!(read_interrupt_status(denali) & INTR__ECC_TRANSACTION_DONE))
-		cpu_relax();
-	clear_interrupts(denali);
-	denali_set_intr_modes(denali, true);
+	irq_status = denali_wait_for_irq(denali, INTR__ECC_TRANSACTION_DONE);
+	if (!(irq_status & INTR__ECC_TRANSACTION_DONE))
+		return -EIO;
 
 	return max_bitflips;
 }
@@ -970,17 +484,17 @@ static int denali_sw_ecc_fixup(struct mtd_info *mtd,
 /* programs the controller to either enable/disable DMA transfers */
 static void denali_enable_dma(struct denali_nand_info *denali, bool en)
 {
-	iowrite32(en ? DMA_ENABLE__FLAG : 0, denali->flash_reg + DMA_ENABLE);
-	ioread32(denali->flash_reg + DMA_ENABLE);
+	iowrite32(en ? DMA_ENABLE__FLAG : 0, denali->reg + DMA_ENABLE);
+	ioread32(denali->reg + DMA_ENABLE);
 }
 
-static void denali_setup_dma64(struct denali_nand_info *denali, int op)
+static void denali_setup_dma64(struct denali_nand_info *denali,
+			       dma_addr_t dma_addr, int page, int write)
 {
 	uint32_t mode;
 	const int page_count = 1;
-	uint64_t addr = denali->buf.dma_buf;
 
-	mode = MODE_10 | BANK(denali->flash_bank) | denali->page;
+	mode = DENALI_MAP10 | DENALI_BANK(denali) | page;
 
 	/* DMA is a three step process */
 
@@ -988,191 +502,354 @@ static void denali_setup_dma64(struct denali_nand_info *denali, int op)
 	 * 1. setup transfer type, interrupt when complete,
 	 *    burst len = 64 bytes, the number of pages
 	 */
-	index_addr(denali, mode, 0x01002000 | (64 << 16) | op | page_count);
+	denali_host_write(denali, mode,
+			  0x01002000 | (64 << 16) | (write << 8) | page_count);
 
 	/* 2. set memory low address */
-	index_addr(denali, mode, addr);
+	denali_host_write(denali, mode, dma_addr);
 
 	/* 3. set memory high address */
-	index_addr(denali, mode, addr >> 32);
+	denali_host_write(denali, mode, (uint64_t)dma_addr >> 32);
 }
 
-static void denali_setup_dma32(struct denali_nand_info *denali, int op)
+static void denali_setup_dma32(struct denali_nand_info *denali,
+			       dma_addr_t dma_addr, int page, int write)
 {
 	uint32_t mode;
 	const int page_count = 1;
-	uint32_t addr = denali->buf.dma_buf;
 
-	mode = MODE_10 | BANK(denali->flash_bank);
+	mode = DENALI_MAP10 | DENALI_BANK(denali);
 
 	/* DMA is a four step process */
 
 	/* 1. setup transfer type and # of pages */
-	index_addr(denali, mode | denali->page, 0x2000 | op | page_count);
+	denali_host_write(denali, mode | page,
+			  0x2000 | (write << 8) | page_count);
 
 	/* 2. set memory high address bits 23:8 */
-	index_addr(denali, mode | ((addr >> 16) << 8), 0x2200);
+	denali_host_write(denali, mode | ((dma_addr >> 16) << 8), 0x2200);
 
 	/* 3. set memory low address bits 23:8 */
-	index_addr(denali, mode | ((addr & 0xffff) << 8), 0x2300);
+	denali_host_write(denali, mode | ((dma_addr & 0xffff) << 8), 0x2300);
 
 	/* 4. interrupt when complete, burst len = 64 bytes */
-	index_addr(denali, mode | 0x14000, 0x2400);
+	denali_host_write(denali, mode | 0x14000, 0x2400);
 }
 
-static void denali_setup_dma(struct denali_nand_info *denali, int op)
+static void denali_setup_dma(struct denali_nand_info *denali,
+			     dma_addr_t dma_addr, int page, int write)
 {
 	if (denali->caps & DENALI_CAP_DMA_64BIT)
-		denali_setup_dma64(denali, op);
+		denali_setup_dma64(denali, dma_addr, page, write);
 	else
-		denali_setup_dma32(denali, op);
+		denali_setup_dma32(denali, dma_addr, page, write);
 }
 
-/*
- * writes a page. user specifies type, and this function handles the
- * configuration details.
- */
-static int write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, bool raw_xfer)
+static int denali_pio_read(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
-	size_t size = mtd->writesize + mtd->oobsize;
+	uint32_t addr = DENALI_BANK(denali) | page;
+	uint32_t *buf32 = (uint32_t *)buf;
+	uint32_t irq_status, ecc_err_mask;
+	int i;
+
+	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	else
+		ecc_err_mask = INTR__ECC_ERR;
+
+	denali_reset_irq(denali);
+
+	iowrite32(DENALI_MAP01 | addr, denali->host + DENALI_HOST_ADDR);
+	for (i = 0; i < size / 4; i++)
+		*buf32++ = ioread32(denali->host + DENALI_HOST_DATA);
+
+	irq_status = denali_wait_for_irq(denali, INTR__PAGE_XFER_INC);
+	if (!(irq_status & INTR__PAGE_XFER_INC))
+		return -EIO;
+
+	if (irq_status & INTR__ERASED_PAGE)
+		memset(buf, 0xff, size);
+
+	return irq_status & ecc_err_mask ? -EBADMSG : 0;
+}
+
+static int denali_pio_write(struct denali_nand_info *denali,
+			    const void *buf, size_t size, int page, int raw)
+{
+	uint32_t addr = DENALI_BANK(denali) | page;
+	const uint32_t *buf32 = (uint32_t *)buf;
 	uint32_t irq_status;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
+	int i;
 
-	/*
-	 * if it is a raw xfer, we want to disable ecc and send the spare area.
-	 * !raw_xfer - enable ecc
-	 * raw_xfer - transfer spare
-	 */
-	setup_ecc_for_xfer(denali, !raw_xfer, raw_xfer);
+	denali_reset_irq(denali);
 
-	/* copy buffer into DMA buffer */
-	memcpy(denali->buf.buf, buf, mtd->writesize);
+	iowrite32(DENALI_MAP01 | addr, denali->host + DENALI_HOST_ADDR);
+	for (i = 0; i < size / 4; i++)
+		iowrite32(*buf32++, denali->host + DENALI_HOST_DATA);
 
-	if (raw_xfer) {
-		/* transfer the data to the spare area */
-		memcpy(denali->buf.buf + mtd->writesize,
-			chip->oob_poi,
-			mtd->oobsize);
-	}
-
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_TO_DEVICE);
-
-	clear_interrupts(denali);
-	denali_enable_dma(denali, true);
-
-	denali_setup_dma(denali, DENALI_WRITE);
-
-	/* wait for operation to complete */
-	irq_status = wait_for_irq(denali, irq_mask);
-
-	if (irq_status == 0) {
-		dev_err(denali->dev, "timeout on write_page (type = %d)\n",
-			raw_xfer);
-		denali->status = NAND_STATUS_FAIL;
-	}
-
-	denali_enable_dma(denali, false);
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_TO_DEVICE);
+	irq_status = denali_wait_for_irq(denali,
+				INTR__PROGRAM_COMP | INTR__PROGRAM_FAIL);
+	if (!(irq_status & INTR__PROGRAM_COMP))
+		return -EIO;
 
 	return 0;
 }
 
-/* NAND core entry points */
-
-/*
- * this is the callback that the NAND core calls to write a page. Since
- * writing a page with ECC or without is similar, all the work is done
- * by write_page above.
- */
-static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int denali_pio_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
 {
-	/*
-	 * for regular page writes, we let HW handle all the ECC
-	 * data written to the device.
-	 */
-	return write_page(mtd, chip, buf, false);
+	if (write)
+		return denali_pio_write(denali, buf, size, page, raw);
+	else
+		return denali_pio_read(denali, buf, size, page, raw);
 }
 
-/*
- * This is the callback that the NAND core calls to write a page without ECC.
- * raw access is similar to ECC page writes, so all the work is done in the
- * write_page() function above.
- */
-static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required,
-				 int page)
+static int denali_dma_xfer(struct denali_nand_info *denali, void *buf,
+			   size_t size, int page, int raw, int write)
 {
-	/*
-	 * for raw page writes, we want to disable ECC and simply write
-	 * whatever data is in the buffer.
-	 */
-	return write_page(mtd, chip, buf, true);
+	dma_addr_t dma_addr;
+	uint32_t irq_mask, irq_status, ecc_err_mask;
+	enum dma_data_direction dir = write ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	int ret = 0;
+
+	dma_addr = dma_map_single(denali->dev, buf, size, dir);
+	if (dma_mapping_error(denali->dev, dma_addr)) {
+		dev_dbg(denali->dev, "Failed to DMA-map buffer. Trying PIO.\n");
+		return denali_pio_xfer(denali, buf, size, page, raw, write);
+	}
+
+	if (write) {
+		/*
+		 * INTR__PROGRAM_COMP is never asserted for the DMA transfer.
+		 * We can use INTR__DMA_CMD_COMP instead.  This flag is asserted
+		 * when the page program is completed.
+		 */
+		irq_mask = INTR__DMA_CMD_COMP | INTR__PROGRAM_FAIL;
+		ecc_err_mask = 0;
+	} else if (denali->caps & DENALI_CAP_HW_ECC_FIXUP) {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_UNCOR_ERR;
+	} else {
+		irq_mask = INTR__DMA_CMD_COMP;
+		ecc_err_mask = INTR__ECC_ERR;
+	}
+
+	denali_enable_dma(denali, true);
+
+	denali_reset_irq(denali);
+	denali_setup_dma(denali, dma_addr, page, write);
+
+	/* wait for operation to complete */
+	irq_status = denali_wait_for_irq(denali, irq_mask);
+	if (!(irq_status & INTR__DMA_CMD_COMP))
+		ret = -EIO;
+	else if (irq_status & ecc_err_mask)
+		ret = -EBADMSG;
+
+	denali_enable_dma(denali, false);
+	dma_unmap_single(denali->dev, dma_addr, size, dir);
+
+	if (irq_status & INTR__ERASED_PAGE)
+		memset(buf, 0xff, size);
+
+	return ret;
 }
 
-static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+static int denali_data_xfer(struct denali_nand_info *denali, void *buf,
+			    size_t size, int page, int raw, int write)
 {
-	return write_oob_data(mtd, chip->oob_poi, page);
+	setup_ecc_for_xfer(denali, !raw, raw);
+
+	if (denali->dma_avail)
+		return denali_dma_xfer(denali, buf, size, page, raw, write);
+	else
+		return denali_pio_xfer(denali, buf, size, page, raw, write);
+}
+
+static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip,
+			    int page, int write)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	unsigned int start_cmd = write ? NAND_CMD_SEQIN : NAND_CMD_READ0;
+	unsigned int rnd_cmd = write ? NAND_CMD_RNDIN : NAND_CMD_RNDOUT;
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	uint8_t *bufpoi = chip->oob_poi;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	int oob_skip = denali->oob_skip_bytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
+
+	/* BBM at the beginning of the OOB area */
+	chip->cmdfunc(mtd, start_cmd, writesize, page);
+	if (write)
+		chip->write_buf(mtd, bufpoi, oob_skip);
+	else
+		chip->read_buf(mtd, bufpoi, oob_skip);
+	bufpoi += oob_skip;
+
+	/* OOB ECC */
+	for (i = 0; i < ecc_steps; i++) {
+		pos = ecc_size + i * (ecc_size + ecc_bytes);
+		len = ecc_bytes;
+
+		if (pos >= writesize)
+			pos += oob_skip;
+		else if (pos + len > writesize)
+			len = writesize - pos;
+
+		chip->cmdfunc(mtd, rnd_cmd, pos, -1);
+		if (write)
+			chip->write_buf(mtd, bufpoi, len);
+		else
+			chip->read_buf(mtd, bufpoi, len);
+		bufpoi += len;
+		if (len < ecc_bytes) {
+			len = ecc_bytes - len;
+			chip->cmdfunc(mtd, rnd_cmd, writesize + oob_skip, -1);
+			if (write)
+				chip->write_buf(mtd, bufpoi, len);
+			else
+				chip->read_buf(mtd, bufpoi, len);
+			bufpoi += len;
+		}
+	}
+
+	/* OOB free */
+	len = oobsize - (bufpoi - chip->oob_poi);
+	chip->cmdfunc(mtd, rnd_cmd, size - len, -1);
+	if (write)
+		chip->write_buf(mtd, bufpoi, len);
+	else
+		chip->read_buf(mtd, bufpoi, len);
+}
+
+static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int oob_skip = denali->oob_skip_bytes;
+	size_t size = writesize + oobsize;
+	int ret, i, pos, len;
+
+	ret = denali_data_xfer(denali, dma_buf, size, page, 1, 0);
+	if (ret)
+		return ret;
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(buf, dma_buf + pos, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(buf, dma_buf + writesize + oob_skip,
+				       len);
+				buf += len;
+			}
+		}
+	}
+
+	if (oob_required) {
+		uint8_t *oob = chip->oob_poi;
+
+		/* BBM at the beginning of the OOB area */
+		memcpy(oob, dma_buf + writesize, oob_skip);
+		oob += oob_skip;
+
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(oob, dma_buf + pos, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(oob, dma_buf + writesize + oob_skip,
+				       len);
+				oob += len;
+			}
+		}
+
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(oob, dma_buf + size - len, len);
+	}
+
+	return 0;
 }
 
 static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page)
 {
-	read_oob_data(mtd, chip->oob_poi, page);
+	denali_oob_xfer(mtd, chip, page, 0);
 
 	return 0;
 }
 
+static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
+			    int page)
+{
+	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	int status;
+
+	denali_reset_irq(denali);
+
+	denali_oob_xfer(mtd, chip, page, 1);
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_status;
-	uint32_t irq_mask = denali->caps & DENALI_CAP_HW_ECC_FIXUP ?
-				INTR__DMA_CMD_COMP | INTR__ECC_UNCOR_ERR :
-				INTR__ECC_TRANSACTION_DONE | INTR__ECC_ERR;
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
+	int ret;
 
-	if (page != denali->page) {
-		dev_err(denali->dev,
-			"IN %s: page %d is not equal to denali->page %d",
-			__func__, page, denali->page);
-		BUG();
-	}
-
-	setup_ecc_for_xfer(denali, true, false);
-
-	denali_enable_dma(denali, true);
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
-
-	clear_interrupts(denali);
-	denali_setup_dma(denali, DENALI_READ);
-
-	/* wait for operation to complete */
-	irq_status = wait_for_irq(denali, irq_mask);
-
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
-
-	memcpy(buf, denali->buf.buf, mtd->writesize);
+	ret = denali_data_xfer(denali, buf, mtd->writesize, page, 0, 0);
+	if (ret && ret != -EBADMSG)
+		return ret;
 
 	if (denali->caps & DENALI_CAP_HW_ECC_FIXUP)
 		stat = denali_hw_ecc_fixup(mtd, denali, &uncor_ecc_flags);
-	else if (irq_status & INTR__ECC_ERR)
+	else if (ret == -EBADMSG)
 		stat = denali_sw_ecc_fixup(mtd, denali, &uncor_ecc_flags, buf);
-	denali_enable_dma(denali, false);
 
 	if (stat < 0)
 		return stat;
 
 	if (uncor_ecc_flags) {
-		read_oob_data(mtd, chip->oob_poi, denali->page);
+		ret = denali_read_oob(mtd, chip, page);
+		if (ret)
+			return ret;
 
 		stat = denali_check_erased_page(mtd, chip, buf,
 						uncor_ecc_flags, stat);
@@ -1181,137 +858,266 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return stat;
 }
 
-static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	dma_addr_t addr = denali->buf.dma_buf;
-	size_t size = mtd->writesize + mtd->oobsize;
-	uint32_t irq_mask = INTR__DMA_CMD_COMP;
+	int writesize = mtd->writesize;
+	int oobsize = mtd->oobsize;
+	int ecc_steps = chip->ecc.steps;
+	int ecc_size = chip->ecc.size;
+	int ecc_bytes = chip->ecc.bytes;
+	void *dma_buf = denali->buf;
+	int oob_skip = denali->oob_skip_bytes;
+	size_t size = writesize + oobsize;
+	int i, pos, len;
 
-	if (page != denali->page) {
-		dev_err(denali->dev,
-			"IN %s: page %d is not equal to denali->page %d",
-			__func__, page, denali->page);
-		BUG();
+	/*
+	 * Fill the buffer with 0xff first except the full page transfer.
+	 * This simplifies the logic.
+	 */
+	if (!buf || !oob_required)
+		memset(dma_buf, 0xff, size);
+
+	/* Arrange the buffer for syndrome payload/ecc layout */
+	if (buf) {
+		for (i = 0; i < ecc_steps; i++) {
+			pos = i * (ecc_size + ecc_bytes);
+			len = ecc_size;
+
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
+
+			memcpy(dma_buf + pos, buf, len);
+			buf += len;
+			if (len < ecc_size) {
+				len = ecc_size - len;
+				memcpy(dma_buf + writesize + oob_skip, buf,
+				       len);
+				buf += len;
+			}
+		}
 	}
 
-	setup_ecc_for_xfer(denali, false, true);
-	denali_enable_dma(denali, true);
+	if (oob_required) {
+		const uint8_t *oob = chip->oob_poi;
 
-	dma_sync_single_for_device(denali->dev, addr, size, DMA_FROM_DEVICE);
+		/* BBM at the beginning of the OOB area */
+		memcpy(dma_buf + writesize, oob, oob_skip);
+		oob += oob_skip;
 
-	clear_interrupts(denali);
-	denali_setup_dma(denali, DENALI_READ);
+		/* OOB ECC */
+		for (i = 0; i < ecc_steps; i++) {
+			pos = ecc_size + i * (ecc_size + ecc_bytes);
+			len = ecc_bytes;
 
-	/* wait for operation to complete */
-	wait_for_irq(denali, irq_mask);
+			if (pos >= writesize)
+				pos += oob_skip;
+			else if (pos + len > writesize)
+				len = writesize - pos;
 
-	dma_sync_single_for_cpu(denali->dev, addr, size, DMA_FROM_DEVICE);
+			memcpy(dma_buf + pos, oob, len);
+			oob += len;
+			if (len < ecc_bytes) {
+				len = ecc_bytes - len;
+				memcpy(dma_buf + writesize + oob_skip, oob,
+				       len);
+				oob += len;
+			}
+		}
 
-	denali_enable_dma(denali, false);
+		/* OOB free */
+		len = oobsize - (oob - chip->oob_poi);
+		memcpy(dma_buf + size - len, oob, len);
+	}
 
-	memcpy(buf, denali->buf.buf, mtd->writesize);
-	memcpy(chip->oob_poi, denali->buf.buf + mtd->writesize, mtd->oobsize);
-
-	return 0;
+	return denali_data_xfer(denali, dma_buf, size, page, 1, 1);
 }
 
-static uint8_t denali_read_byte(struct mtd_info *mtd)
+static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			     const uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint8_t result = 0xff;
 
-	if (denali->buf.head < denali->buf.tail)
-		result = denali->buf.buf[denali->buf.head++];
-
-	return result;
+	return denali_data_xfer(denali, (void *)buf, mtd->writesize,
+				page, 0, 1);
 }
 
 static void denali_select_chip(struct mtd_info *mtd, int chip)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
-	spin_lock_irq(&denali->irq_lock);
-	denali->flash_bank = chip;
-	spin_unlock_irq(&denali->irq_lock);
+	denali->active_bank = chip;
 }
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	int status = denali->status;
+	uint32_t irq_status;
 
-	denali->status = 0;
+	/* R/B# pin transitioned from low to high? */
+	irq_status = denali_wait_for_irq(denali, INTR__INT_ACT);
 
-	return status;
+	return irq_status & INTR__INT_ACT ? 0 : NAND_STATUS_FAIL;
 }
 
 static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	uint32_t irq_status;
 
-	uint32_t cmd, irq_status;
+	denali_reset_irq(denali);
 
-	clear_interrupts(denali);
-
-	/* setup page read request for access type */
-	cmd = MODE_10 | BANK(denali->flash_bank) | page;
-	index_addr(denali, cmd, 0x1);
+	denali_host_write(denali, DENALI_MAP10 | DENALI_BANK(denali) | page,
+			  DENALI_ERASE);
 
 	/* wait for erase to complete or failure to occur */
-	irq_status = wait_for_irq(denali, INTR__ERASE_COMP | INTR__ERASE_FAIL);
+	irq_status = denali_wait_for_irq(denali,
+					 INTR__ERASE_COMP | INTR__ERASE_FAIL);
 
-	return irq_status & INTR__ERASE_FAIL ? NAND_STATUS_FAIL : PASS;
+	return irq_status & INTR__ERASE_COMP ? 0 : NAND_STATUS_FAIL;
 }
 
-static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
-			   int page)
+#define DIV_ROUND_DOWN_ULL(ll, d) \
+	({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
+
+static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
+				       const struct nand_data_interface *conf)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
-	uint32_t addr, id;
+	const struct nand_sdr_timings *timings;
+	unsigned long t_clk;
+	int acc_clks, re_2_we, re_2_re, we_2_re, addr_2_data;
+	int rdwr_en_lo, rdwr_en_hi, rdwr_en_lo_hi, cs_setup;
+	int addr_2_data_mask;
+	uint32_t tmp;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return PTR_ERR(timings);
+
+	/* clk_x period in picoseconds */
+	t_clk = DIV_ROUND_DOWN_ULL(1000000000000ULL, denali->clk_x_rate);
+	if (!t_clk)
+		return -EINVAL;
+
+	if (chipnr == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	/* tREA -> ACC_CLKS */
+	acc_clks = DIV_ROUND_UP(timings->tREA_max, t_clk);
+	acc_clks = min_t(int, acc_clks, ACC_CLKS__VALUE);
+
+	tmp = ioread32(denali->reg + ACC_CLKS);
+	tmp &= ~ACC_CLKS__VALUE;
+	tmp |= acc_clks;
+	iowrite32(tmp, denali->reg + ACC_CLKS);
+
+	/* tRWH -> RE_2_WE */
+	re_2_we = DIV_ROUND_UP(timings->tRHW_min, t_clk);
+	re_2_we = min_t(int, re_2_we, RE_2_WE__VALUE);
+
+	tmp = ioread32(denali->reg + RE_2_WE);
+	tmp &= ~RE_2_WE__VALUE;
+	tmp |= re_2_we;
+	iowrite32(tmp, denali->reg + RE_2_WE);
+
+	/* tRHZ -> RE_2_RE */
+	re_2_re = DIV_ROUND_UP(timings->tRHZ_max, t_clk);
+	re_2_re = min_t(int, re_2_re, RE_2_RE__VALUE);
+
+	tmp = ioread32(denali->reg + RE_2_RE);
+	tmp &= ~RE_2_RE__VALUE;
+	tmp |= re_2_re;
+	iowrite32(tmp, denali->reg + RE_2_RE);
+
+	/* tWHR -> WE_2_RE */
+	we_2_re = DIV_ROUND_UP(timings->tWHR_min, t_clk);
+	we_2_re = min_t(int, we_2_re, TWHR2_AND_WE_2_RE__WE_2_RE);
+
+	tmp = ioread32(denali->reg + TWHR2_AND_WE_2_RE);
+	tmp &= ~TWHR2_AND_WE_2_RE__WE_2_RE;
+	tmp |= we_2_re;
+	iowrite32(tmp, denali->reg + TWHR2_AND_WE_2_RE);
+
+	/* tADL -> ADDR_2_DATA */
+
+	/* for older versions, ADDR_2_DATA is only 6 bit wide */
+	addr_2_data_mask = TCWAW_AND_ADDR_2_DATA__ADDR_2_DATA;
+	if (denali->revision < 0x0501)
+		addr_2_data_mask >>= 1;
+
+	addr_2_data = DIV_ROUND_UP(timings->tADL_min, t_clk);
+	addr_2_data = min_t(int, addr_2_data, addr_2_data_mask);
+
+	tmp = ioread32(denali->reg + TCWAW_AND_ADDR_2_DATA);
+	tmp &= ~addr_2_data_mask;
+	tmp |= addr_2_data;
+	iowrite32(tmp, denali->reg + TCWAW_AND_ADDR_2_DATA);
+
+	/* tREH, tWH -> RDWR_EN_HI_CNT */
+	rdwr_en_hi = DIV_ROUND_UP(max(timings->tREH_min, timings->tWH_min),
+				  t_clk);
+	rdwr_en_hi = min_t(int, rdwr_en_hi, RDWR_EN_HI_CNT__VALUE);
+
+	tmp = ioread32(denali->reg + RDWR_EN_HI_CNT);
+	tmp &= ~RDWR_EN_HI_CNT__VALUE;
+	tmp |= rdwr_en_hi;
+	iowrite32(tmp, denali->reg + RDWR_EN_HI_CNT);
+
+	/* tRP, tWP -> RDWR_EN_LO_CNT */
+	rdwr_en_lo = DIV_ROUND_UP(max(timings->tRP_min, timings->tWP_min),
+				  t_clk);
+	rdwr_en_lo_hi = DIV_ROUND_UP(max(timings->tRC_min, timings->tWC_min),
+				     t_clk);
+	rdwr_en_lo_hi = max(rdwr_en_lo_hi, DENALI_CLK_X_MULT);
+	rdwr_en_lo = max(rdwr_en_lo, rdwr_en_lo_hi - rdwr_en_hi);
+	rdwr_en_lo = min_t(int, rdwr_en_lo, RDWR_EN_LO_CNT__VALUE);
+
+	tmp = ioread32(denali->reg + RDWR_EN_LO_CNT);
+	tmp &= ~RDWR_EN_LO_CNT__VALUE;
+	tmp |= rdwr_en_lo;
+	iowrite32(tmp, denali->reg + RDWR_EN_LO_CNT);
+
+	/* tCS, tCEA -> CS_SETUP_CNT */
+	cs_setup = max3((int)DIV_ROUND_UP(timings->tCS_min, t_clk) - rdwr_en_lo,
+			(int)DIV_ROUND_UP(timings->tCEA_max, t_clk) - acc_clks,
+			0);
+	cs_setup = min_t(int, cs_setup, CS_SETUP_CNT__VALUE);
+
+	tmp = ioread32(denali->reg + CS_SETUP_CNT);
+	tmp &= ~CS_SETUP_CNT__VALUE;
+	tmp |= cs_setup;
+	iowrite32(tmp, denali->reg + CS_SETUP_CNT);
+
+	return 0;
+}
+
+static void denali_reset_banks(struct denali_nand_info *denali)
+{
+	u32 irq_status;
 	int i;
 
-	switch (cmd) {
-	case NAND_CMD_PAGEPROG:
-		break;
-	case NAND_CMD_STATUS:
-		read_status(denali);
-		break;
-	case NAND_CMD_READID:
-	case NAND_CMD_PARAM:
-		reset_buf(denali);
-		/*
-		 * sometimes ManufactureId read from register is not right
-		 * e.g. some of Micron MT29F32G08QAA MLC NAND chips
-		 * So here we send READID cmd to NAND insteand
-		 */
-		addr = MODE_11 | BANK(denali->flash_bank);
-		index_addr(denali, addr | 0, 0x90);
-		index_addr(denali, addr | 1, col);
-		for (i = 0; i < 8; i++) {
-			index_addr_read_data(denali, addr | 2, &id);
-			write_byte_to_buf(denali, id);
-		}
-		break;
-	case NAND_CMD_READ0:
-	case NAND_CMD_SEQIN:
-		denali->page = page;
-		break;
-	case NAND_CMD_RESET:
-		reset_bank(denali);
-		break;
-	case NAND_CMD_READOOB:
-		/* TODO: Read OOB data */
-		break;
-	default:
-		pr_err(": unsupported command received 0x%x\n", cmd);
-		break;
-	}
-}
-/* end NAND core entry points */
+	for (i = 0; i < denali->max_banks; i++) {
+		denali->active_bank = i;
 
-/* Initialization code to bring the device up to a known good state */
+		denali_reset_irq(denali);
+
+		iowrite32(DEVICE_RESET__BANK(i),
+			  denali->reg + DEVICE_RESET);
+
+		irq_status = denali_wait_for_irq(denali,
+			INTR__RST_COMP | INTR__INT_ACT | INTR__TIME_OUT);
+		if (!(irq_status & INTR__INT_ACT))
+			break;
+	}
+
+	dev_dbg(denali->dev, "%d chips connected\n", i);
+	denali->max_banks = i;
+}
+
 static void denali_hw_init(struct denali_nand_info *denali)
 {
 	/*
@@ -1319,8 +1125,7 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	 * override it.
 	 */
 	if (!denali->revision)
-		denali->revision =
-				swab16(ioread32(denali->flash_reg + REVISION));
+		denali->revision = swab16(ioread32(denali->reg + REVISION));
 
 	/*
 	 * tell driver how many bit controller will skip before
@@ -1328,30 +1133,51 @@ static void denali_hw_init(struct denali_nand_info *denali)
 	 * set by firmware. So we read this value out.
 	 * if this value is 0, just let it be.
 	 */
-	denali->bbtskipbytes = ioread32(denali->flash_reg +
-						SPARE_AREA_SKIP_BYTES);
+	denali->oob_skip_bytes = ioread32(denali->reg + SPARE_AREA_SKIP_BYTES);
 	detect_max_banks(denali);
-	denali_nand_reset(denali);
-	iowrite32(0x0F, denali->flash_reg + RB_PIN_ENABLED);
-	iowrite32(CHIP_EN_DONT_CARE__FLAG,
-			denali->flash_reg + CHIP_ENABLE_DONT_CARE);
+	iowrite32(0x0F, denali->reg + RB_PIN_ENABLED);
+	iowrite32(CHIP_EN_DONT_CARE__FLAG, denali->reg + CHIP_ENABLE_DONT_CARE);
 
-	iowrite32(0xffff, denali->flash_reg + SPARE_AREA_MARKER);
+	iowrite32(0xffff, denali->reg + SPARE_AREA_MARKER);
 
 	/* Should set value for these registers when init */
-	iowrite32(0, denali->flash_reg + TWO_ROW_ADDR_CYCLES);
-	iowrite32(1, denali->flash_reg + ECC_ENABLE);
-	denali_nand_timing_set(denali);
-	denali_irq_init(denali);
+	iowrite32(0, denali->reg + TWO_ROW_ADDR_CYCLES);
+	iowrite32(1, denali->reg + ECC_ENABLE);
 }
 
-/*
- * Althogh controller spec said SLC ECC is forceb to be 4bit,
- * but denali controller in MRST only support 15bit and 8bit ECC
- * correction
- */
-#define ECC_8BITS	14
-#define ECC_15BITS	26
+int denali_calc_ecc_bytes(int step_size, int strength)
+{
+	/* BCH code.  Denali requires ecc.bytes to be multiple of 2 */
+	return DIV_ROUND_UP(strength * fls(step_size * 8), 16) * 2;
+}
+EXPORT_SYMBOL(denali_calc_ecc_bytes);
+
+static int denali_ecc_setup(struct mtd_info *mtd, struct nand_chip *chip,
+			    struct denali_nand_info *denali)
+{
+	int oobavail = mtd->oobsize - denali->oob_skip_bytes;
+	int ret;
+
+	/*
+	 * If .size and .strength are already set (usually by DT),
+	 * check if they are supported by this controller.
+	 */
+	if (chip->ecc.size && chip->ecc.strength)
+		return nand_check_ecc_caps(chip, denali->ecc_caps, oobavail);
+
+	/*
+	 * We want .size and .strength closest to the chip's requirement
+	 * unless NAND_ECC_MAXIMIZE is requested.
+	 */
+	if (!(chip->ecc.options & NAND_ECC_MAXIMIZE)) {
+		ret = nand_match_ecc_req(chip, denali->ecc_caps, oobavail);
+		if (!ret)
+			return 0;
+	}
+
+	/* Max ECC strength is the last thing we can do */
+	return nand_maximize_ecc(chip, denali->ecc_caps, oobavail);
+}
 
 static int denali_ooblayout_ecc(struct mtd_info *mtd, int section,
 				struct mtd_oob_region *oobregion)
@@ -1362,7 +1188,7 @@ static int denali_ooblayout_ecc(struct mtd_info *mtd, int section,
 	if (section)
 		return -ERANGE;
 
-	oobregion->offset = denali->bbtskipbytes;
+	oobregion->offset = denali->oob_skip_bytes;
 	oobregion->length = chip->ecc.total;
 
 	return 0;
@@ -1377,7 +1203,7 @@ static int denali_ooblayout_free(struct mtd_info *mtd, int section,
 	if (section)
 		return -ERANGE;
 
-	oobregion->offset = chip->ecc.total + denali->bbtskipbytes;
+	oobregion->offset = chip->ecc.total + denali->oob_skip_bytes;
 	oobregion->length = mtd->oobsize - oobregion->offset;
 
 	return 0;
@@ -1388,29 +1214,6 @@ static const struct mtd_ooblayout_ops denali_ooblayout_ops = {
 	.free = denali_ooblayout_free,
 };
 
-static uint8_t bbt_pattern[] = {'B', 'b', 't', '0' };
-static uint8_t mirror_pattern[] = {'1', 't', 'b', 'B' };
-
-static struct nand_bbt_descr bbt_main_descr = {
-	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
-		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP,
-	.offs =	8,
-	.len = 4,
-	.veroffs = 12,
-	.maxblocks = 4,
-	.pattern = bbt_pattern,
-};
-
-static struct nand_bbt_descr bbt_mirror_descr = {
-	.options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE
-		| NAND_BBT_2BIT | NAND_BBT_VERSION | NAND_BBT_PERCHIP,
-	.offs =	8,
-	.len = 4,
-	.veroffs = 12,
-	.maxblocks = 4,
-	.pattern = mirror_pattern,
-};
-
 /* initialize driver data structures */
 static void denali_drv_init(struct denali_nand_info *denali)
 {
@@ -1425,12 +1228,6 @@ static void denali_drv_init(struct denali_nand_info *denali)
 	 * element that might be access shared data (interrupt status)
 	 */
 	spin_lock_init(&denali->irq_lock);
-
-	/* indicate that MTD has not selected a valid bank yet */
-	denali->flash_bank = CHIP_SELECT_INVALID;
-
-	/* initialize our irq_status variable to indicate no interrupts */
-	denali->irq_status = 0;
 }
 
 static int denali_multidev_fixup(struct denali_nand_info *denali)
@@ -1445,23 +1242,23 @@ static int denali_multidev_fixup(struct denali_nand_info *denali)
 	 * In this case, the core framework knows nothing about this fact,
 	 * so we should tell it the _logical_ pagesize and anything necessary.
 	 */
-	denali->devnum = ioread32(denali->flash_reg + DEVICES_CONNECTED);
+	denali->devs_per_cs = ioread32(denali->reg + DEVICES_CONNECTED);
 
 	/*
 	 * On some SoCs, DEVICES_CONNECTED is not auto-detected.
 	 * For those, DEVICES_CONNECTED is left to 0.  Set 1 if it is the case.
 	 */
-	if (denali->devnum == 0) {
-		denali->devnum = 1;
-		iowrite32(1, denali->flash_reg + DEVICES_CONNECTED);
+	if (denali->devs_per_cs == 0) {
+		denali->devs_per_cs = 1;
+		iowrite32(1, denali->reg + DEVICES_CONNECTED);
 	}
 
-	if (denali->devnum == 1)
+	if (denali->devs_per_cs == 1)
 		return 0;
 
-	if (denali->devnum != 2) {
+	if (denali->devs_per_cs != 2) {
 		dev_err(denali->dev, "unsupported number of devices %d\n",
-			denali->devnum);
+			denali->devs_per_cs);
 		return -EINVAL;
 	}
 
@@ -1479,7 +1276,7 @@ static int denali_multidev_fixup(struct denali_nand_info *denali)
 	chip->ecc.size <<= 1;
 	chip->ecc.bytes <<= 1;
 	chip->ecc.strength <<= 1;
-	denali->bbtskipbytes <<= 1;
+	denali->oob_skip_bytes <<= 1;
 
 	return 0;
 }
@@ -1490,27 +1287,12 @@ int denali_init(struct denali_nand_info *denali)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	if (denali->platform == INTEL_CE4100) {
-		/*
-		 * Due to a silicon limitation, we can only support
-		 * ONFI timing mode 1 and below.
-		 */
-		if (onfi_timing_mode < -1 || onfi_timing_mode > 1) {
-			pr_err("Intel CE4100 only supports ONFI timing mode 1 or below\n");
-			return -EINVAL;
-		}
-	}
-
-	/* allocate a temporary buffer for nand_scan_ident() */
-	denali->buf.buf = devm_kzalloc(denali->dev, PAGE_SIZE,
-					GFP_DMA | GFP_KERNEL);
-	if (!denali->buf.buf)
-		return -ENOMEM;
-
 	mtd->dev.parent = denali->dev;
 	denali_hw_init(denali);
 	denali_drv_init(denali);
 
+	denali_clear_irq_all(denali);
+
 	/* Request IRQ after all the hardware initialization is finished */
 	ret = devm_request_irq(denali->dev, denali->irq, denali_isr,
 			       IRQF_SHARED, DENALI_NAND_NAME, denali);
@@ -1519,8 +1301,11 @@ int denali_init(struct denali_nand_info *denali)
 		return ret;
 	}
 
-	/* now that our ISR is registered, we can enable interrupts */
-	denali_set_intr_modes(denali, true);
+	denali_enable_irq(denali);
+	denali_reset_banks(denali);
+
+	denali->active_bank = DENALI_INVALID_BANK;
+
 	nand_set_flash_node(chip, denali->dev->of_node);
 	/* Fallback to the default name if DT did not give "label" property */
 	if (!mtd->name)
@@ -1528,10 +1313,17 @@ int denali_init(struct denali_nand_info *denali)
 
 	/* register the driver with the NAND core subsystem */
 	chip->select_chip = denali_select_chip;
-	chip->cmdfunc = denali_cmdfunc;
 	chip->read_byte = denali_read_byte;
+	chip->write_byte = denali_write_byte;
+	chip->read_word = denali_read_word;
+	chip->cmd_ctrl = denali_cmd_ctrl;
+	chip->dev_ready = denali_dev_ready;
 	chip->waitfunc = denali_waitfunc;
 
+	/* clk rate info is needed for setup_data_interface */
+	if (denali->clk_x_rate)
+		chip->setup_data_interface = denali_setup_data_interface;
+
 	/*
 	 * scan for NAND devices attached to the controller
 	 * this is the first stage in a two step process to register
@@ -1539,33 +1331,25 @@ int denali_init(struct denali_nand_info *denali)
 	 */
 	ret = nand_scan_ident(mtd, denali->max_banks, NULL);
 	if (ret)
-		goto failed_req_irq;
+		goto disable_irq;
 
-	/* allocate the right size buffer now */
-	devm_kfree(denali->dev, denali->buf.buf);
-	denali->buf.buf = devm_kzalloc(denali->dev,
-			     mtd->writesize + mtd->oobsize,
-			     GFP_KERNEL);
-	if (!denali->buf.buf) {
-		ret = -ENOMEM;
-		goto failed_req_irq;
+	if (ioread32(denali->reg + FEATURES) & FEATURES__DMA)
+		denali->dma_avail = 1;
+
+	if (denali->dma_avail) {
+		int dma_bit = denali->caps & DENALI_CAP_DMA_64BIT ? 64 : 32;
+
+		ret = dma_set_mask(denali->dev, DMA_BIT_MASK(dma_bit));
+		if (ret) {
+			dev_info(denali->dev,
+				 "Failed to set DMA mask. Disabling DMA.\n");
+			denali->dma_avail = 0;
+		}
 	}
 
-	ret = dma_set_mask(denali->dev,
-			   DMA_BIT_MASK(denali->caps & DENALI_CAP_DMA_64BIT ?
-					64 : 32));
-	if (ret) {
-		dev_err(denali->dev, "No usable DMA configuration\n");
-		goto failed_req_irq;
-	}
-
-	denali->buf.dma_buf = dma_map_single(denali->dev, denali->buf.buf,
-			     mtd->writesize + mtd->oobsize,
-			     DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(denali->dev, denali->buf.dma_buf)) {
-		dev_err(denali->dev, "Failed to map DMA buffer\n");
-		ret = -EIO;
-		goto failed_req_irq;
+	if (denali->dma_avail) {
+		chip->options |= NAND_USE_BOUNCE_BUFFER;
+		chip->buf_align = 16;
 	}
 
 	/*
@@ -1574,46 +1358,49 @@ int denali_init(struct denali_nand_info *denali)
 	 * bad block management.
 	 */
 
-	/* Bad block management */
-	chip->bbt_td = &bbt_main_descr;
-	chip->bbt_md = &bbt_mirror_descr;
-
-	/* skip the scan for now until we have OOB read and write support */
 	chip->bbt_options |= NAND_BBT_USE_FLASH;
-	chip->options |= NAND_SKIP_BBTSCAN;
+	chip->bbt_options |= NAND_BBT_NO_OOB;
+
 	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
 
 	/* no subpage writes on denali */
 	chip->options |= NAND_NO_SUBPAGE_WRITE;
 
-	/*
-	 * Denali Controller only support 15bit and 8bit ECC in MRST,
-	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
-	 * SLC if possible.
-	 * */
-	if (!nand_is_slc(chip) &&
-			(mtd->oobsize > (denali->bbtskipbytes +
-			ECC_15BITS * (mtd->writesize /
-			ECC_SECTOR_SIZE)))) {
-		/* if MLC OOB size is large enough, use 15bit ECC*/
-		chip->ecc.strength = 15;
-		chip->ecc.bytes = ECC_15BITS;
-		iowrite32(15, denali->flash_reg + ECC_CORRECTION);
-	} else if (mtd->oobsize < (denali->bbtskipbytes +
-			ECC_8BITS * (mtd->writesize /
-			ECC_SECTOR_SIZE))) {
-		pr_err("Your NAND chip OOB is not large enough to contain 8bit ECC correction codes");
-		goto failed_req_irq;
-	} else {
-		chip->ecc.strength = 8;
-		chip->ecc.bytes = ECC_8BITS;
-		iowrite32(8, denali->flash_reg + ECC_CORRECTION);
+	ret = denali_ecc_setup(mtd, chip, denali);
+	if (ret) {
+		dev_err(denali->dev, "Failed to setup ECC settings.\n");
+		goto disable_irq;
 	}
 
+	dev_dbg(denali->dev,
+		"chosen ECC settings: step=%d, strength=%d, bytes=%d\n",
+		chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
+
+	iowrite32(MAKE_ECC_CORRECTION(chip->ecc.strength, 1),
+		  denali->reg + ECC_CORRECTION);
+	iowrite32(mtd->erasesize / mtd->writesize,
+		  denali->reg + PAGES_PER_BLOCK);
+	iowrite32(chip->options & NAND_BUSWIDTH_16 ? 1 : 0,
+		  denali->reg + DEVICE_WIDTH);
+	iowrite32(mtd->writesize, denali->reg + DEVICE_MAIN_AREA_SIZE);
+	iowrite32(mtd->oobsize, denali->reg + DEVICE_SPARE_AREA_SIZE);
+
+	iowrite32(chip->ecc.size, denali->reg + CFG_DATA_BLOCK_SIZE);
+	iowrite32(chip->ecc.size, denali->reg + CFG_LAST_DATA_BLOCK_SIZE);
+	/* chip->ecc.steps is set by nand_scan_tail(); not available here */
+	iowrite32(mtd->writesize / chip->ecc.size,
+		  denali->reg + CFG_NUM_DATA_BLOCKS);
+
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
-	/* override the default read operations */
-	chip->ecc.size = ECC_SECTOR_SIZE;
+	if (chip->options & NAND_BUSWIDTH_16) {
+		chip->read_buf = denali_read_buf16;
+		chip->write_buf = denali_write_buf16;
+	} else {
+		chip->read_buf = denali_read_buf;
+		chip->write_buf = denali_write_buf;
+	}
+	chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS;
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
 	chip->ecc.write_page = denali_write_page;
@@ -1624,21 +1411,34 @@ int denali_init(struct denali_nand_info *denali)
 
 	ret = denali_multidev_fixup(denali);
 	if (ret)
-		goto failed_req_irq;
+		goto disable_irq;
+
+	/*
+	 * This buffer is DMA-mapped by denali_{read,write}_page_raw.  Do not
+	 * use devm_kmalloc() because the memory allocated by devm_ does not
+	 * guarantee DMA-safe alignment.
+	 */
+	denali->buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL);
+	if (!denali->buf) {
+		ret = -ENOMEM;
+		goto disable_irq;
+	}
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-		goto failed_req_irq;
+		goto free_buf;
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(denali->dev, "Failed to register MTD: %d\n", ret);
-		goto failed_req_irq;
+		goto free_buf;
 	}
 	return 0;
 
-failed_req_irq:
-	denali_irq_cleanup(denali->irq, denali);
+free_buf:
+	kfree(denali->buf);
+disable_irq:
+	denali_disable_irq(denali);
 
 	return ret;
 }
@@ -1648,16 +1448,9 @@ EXPORT_SYMBOL(denali_init);
 void denali_remove(struct denali_nand_info *denali)
 {
 	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
-	/*
-	 * Pre-compute DMA buffer size to avoid any problems in case
-	 * nand_release() ever changes in a way that mtd->writesize and
-	 * mtd->oobsize are not reliable after this call.
-	 */
-	int bufsize = mtd->writesize + mtd->oobsize;
 
 	nand_release(mtd);
-	denali_irq_cleanup(denali->irq, denali);
-	dma_unmap_single(denali->dev, denali->buf.dma_buf, bufsize,
-			 DMA_BIDIRECTIONAL);
+	kfree(denali->buf);
+	denali_disable_irq(denali);
 }
 EXPORT_SYMBOL(denali_remove);

diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h
index ec00485..237cc70 100644
--- a/drivers/mtd/nand/denali.h
+++ b/drivers/mtd/nand/denali.h

@@ -24,330 +24,315 @@
 #include <linux/mtd/nand.h>
 
 #define DEVICE_RESET				0x0
-#define     DEVICE_RESET__BANK0				0x0001
-#define     DEVICE_RESET__BANK1				0x0002
-#define     DEVICE_RESET__BANK2				0x0004
-#define     DEVICE_RESET__BANK3				0x0008
+#define     DEVICE_RESET__BANK(bank)			BIT(bank)
 
 #define TRANSFER_SPARE_REG			0x10
-#define     TRANSFER_SPARE_REG__FLAG			0x0001
+#define     TRANSFER_SPARE_REG__FLAG			BIT(0)
 
 #define LOAD_WAIT_CNT				0x20
-#define     LOAD_WAIT_CNT__VALUE			0xffff
+#define     LOAD_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define PROGRAM_WAIT_CNT			0x30
-#define     PROGRAM_WAIT_CNT__VALUE			0xffff
+#define     PROGRAM_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define ERASE_WAIT_CNT				0x40
-#define     ERASE_WAIT_CNT__VALUE			0xffff
+#define     ERASE_WAIT_CNT__VALUE			GENMASK(15, 0)
 
 #define INT_MON_CYCCNT				0x50
-#define     INT_MON_CYCCNT__VALUE			0xffff
+#define     INT_MON_CYCCNT__VALUE			GENMASK(15, 0)
 
 #define RB_PIN_ENABLED				0x60
-#define     RB_PIN_ENABLED__BANK0			0x0001
-#define     RB_PIN_ENABLED__BANK1			0x0002
-#define     RB_PIN_ENABLED__BANK2			0x0004
-#define     RB_PIN_ENABLED__BANK3			0x0008
+#define     RB_PIN_ENABLED__BANK(bank)			BIT(bank)
 
 #define MULTIPLANE_OPERATION			0x70
-#define     MULTIPLANE_OPERATION__FLAG			0x0001
+#define     MULTIPLANE_OPERATION__FLAG			BIT(0)
 
 #define MULTIPLANE_READ_ENABLE			0x80
-#define     MULTIPLANE_READ_ENABLE__FLAG		0x0001
+#define     MULTIPLANE_READ_ENABLE__FLAG		BIT(0)
 
 #define COPYBACK_DISABLE			0x90
-#define     COPYBACK_DISABLE__FLAG			0x0001
+#define     COPYBACK_DISABLE__FLAG			BIT(0)
 
 #define CACHE_WRITE_ENABLE			0xa0
-#define     CACHE_WRITE_ENABLE__FLAG			0x0001
+#define     CACHE_WRITE_ENABLE__FLAG			BIT(0)
 
 #define CACHE_READ_ENABLE			0xb0
-#define     CACHE_READ_ENABLE__FLAG			0x0001
+#define     CACHE_READ_ENABLE__FLAG			BIT(0)
 
 #define PREFETCH_MODE				0xc0
-#define     PREFETCH_MODE__PREFETCH_EN			0x0001
-#define     PREFETCH_MODE__PREFETCH_BURST_LENGTH	0xfff0
+#define     PREFETCH_MODE__PREFETCH_EN			BIT(0)
+#define     PREFETCH_MODE__PREFETCH_BURST_LENGTH	GENMASK(15, 4)
 
 #define CHIP_ENABLE_DONT_CARE			0xd0
-#define     CHIP_EN_DONT_CARE__FLAG			0x01
+#define     CHIP_EN_DONT_CARE__FLAG			BIT(0)
 
 #define ECC_ENABLE				0xe0
-#define     ECC_ENABLE__FLAG				0x0001
+#define     ECC_ENABLE__FLAG				BIT(0)
 
 #define GLOBAL_INT_ENABLE			0xf0
-#define     GLOBAL_INT_EN_FLAG				0x01
+#define     GLOBAL_INT_EN_FLAG				BIT(0)
 
-#define WE_2_RE					0x100
-#define     WE_2_RE__VALUE				0x003f
+#define TWHR2_AND_WE_2_RE			0x100
+#define     TWHR2_AND_WE_2_RE__WE_2_RE			GENMASK(5, 0)
+#define     TWHR2_AND_WE_2_RE__TWHR2			GENMASK(13, 8)
 
-#define ADDR_2_DATA				0x110
-#define     ADDR_2_DATA__VALUE				0x003f
+#define TCWAW_AND_ADDR_2_DATA			0x110
+/* The width of ADDR_2_DATA is 6 bit for old IP, 7 bit for new IP */
+#define     TCWAW_AND_ADDR_2_DATA__ADDR_2_DATA		GENMASK(6, 0)
+#define     TCWAW_AND_ADDR_2_DATA__TCWAW		GENMASK(13, 8)
 
 #define RE_2_WE					0x120
-#define     RE_2_WE__VALUE				0x003f
+#define     RE_2_WE__VALUE				GENMASK(5, 0)
 
 #define ACC_CLKS				0x130
-#define     ACC_CLKS__VALUE				0x000f
+#define     ACC_CLKS__VALUE				GENMASK(3, 0)
 
 #define NUMBER_OF_PLANES			0x140
-#define     NUMBER_OF_PLANES__VALUE			0x0007
+#define     NUMBER_OF_PLANES__VALUE			GENMASK(2, 0)
 
 #define PAGES_PER_BLOCK				0x150
-#define     PAGES_PER_BLOCK__VALUE			0xffff
+#define     PAGES_PER_BLOCK__VALUE			GENMASK(15, 0)
 
 #define DEVICE_WIDTH				0x160
-#define     DEVICE_WIDTH__VALUE				0x0003
+#define     DEVICE_WIDTH__VALUE				GENMASK(1, 0)
 
 #define DEVICE_MAIN_AREA_SIZE			0x170
-#define     DEVICE_MAIN_AREA_SIZE__VALUE		0xffff
+#define     DEVICE_MAIN_AREA_SIZE__VALUE		GENMASK(15, 0)
 
 #define DEVICE_SPARE_AREA_SIZE			0x180
-#define     DEVICE_SPARE_AREA_SIZE__VALUE		0xffff
+#define     DEVICE_SPARE_AREA_SIZE__VALUE		GENMASK(15, 0)
 
 #define TWO_ROW_ADDR_CYCLES			0x190
-#define     TWO_ROW_ADDR_CYCLES__FLAG			0x0001
+#define     TWO_ROW_ADDR_CYCLES__FLAG			BIT(0)
 
 #define MULTIPLANE_ADDR_RESTRICT		0x1a0
-#define     MULTIPLANE_ADDR_RESTRICT__FLAG		0x0001
+#define     MULTIPLANE_ADDR_RESTRICT__FLAG		BIT(0)
 
 #define ECC_CORRECTION				0x1b0
-#define     ECC_CORRECTION__VALUE			0x001f
+#define     ECC_CORRECTION__VALUE			GENMASK(4, 0)
+#define     ECC_CORRECTION__ERASE_THRESHOLD		GENMASK(31, 16)
+#define     MAKE_ECC_CORRECTION(val, thresh)		\
+			(((val) & (ECC_CORRECTION__VALUE)) | \
+			(((thresh) << 16) & (ECC_CORRECTION__ERASE_THRESHOLD)))
 
 #define READ_MODE				0x1c0
-#define     READ_MODE__VALUE				0x000f
+#define     READ_MODE__VALUE				GENMASK(3, 0)
 
 #define WRITE_MODE				0x1d0
-#define     WRITE_MODE__VALUE				0x000f
+#define     WRITE_MODE__VALUE				GENMASK(3, 0)
 
 #define COPYBACK_MODE				0x1e0
-#define     COPYBACK_MODE__VALUE			0x000f
+#define     COPYBACK_MODE__VALUE			GENMASK(3, 0)
 
 #define RDWR_EN_LO_CNT				0x1f0
-#define     RDWR_EN_LO_CNT__VALUE			0x001f
+#define     RDWR_EN_LO_CNT__VALUE			GENMASK(4, 0)
 
 #define RDWR_EN_HI_CNT				0x200
-#define     RDWR_EN_HI_CNT__VALUE			0x001f
+#define     RDWR_EN_HI_CNT__VALUE			GENMASK(4, 0)
 
 #define MAX_RD_DELAY				0x210
-#define     MAX_RD_DELAY__VALUE				0x000f
+#define     MAX_RD_DELAY__VALUE				GENMASK(3, 0)
 
 #define CS_SETUP_CNT				0x220
-#define     CS_SETUP_CNT__VALUE				0x001f
+#define     CS_SETUP_CNT__VALUE				GENMASK(4, 0)
+#define     CS_SETUP_CNT__TWB				GENMASK(17, 12)
 
 #define SPARE_AREA_SKIP_BYTES			0x230
-#define     SPARE_AREA_SKIP_BYTES__VALUE		0x003f
+#define     SPARE_AREA_SKIP_BYTES__VALUE		GENMASK(5, 0)
 
 #define SPARE_AREA_MARKER			0x240
-#define     SPARE_AREA_MARKER__VALUE			0xffff
+#define     SPARE_AREA_MARKER__VALUE			GENMASK(15, 0)
 
 #define DEVICES_CONNECTED			0x250
-#define     DEVICES_CONNECTED__VALUE			0x0007
+#define     DEVICES_CONNECTED__VALUE			GENMASK(2, 0)
 
 #define DIE_MASK				0x260
-#define     DIE_MASK__VALUE				0x00ff
+#define     DIE_MASK__VALUE				GENMASK(7, 0)
 
 #define FIRST_BLOCK_OF_NEXT_PLANE		0x270
-#define     FIRST_BLOCK_OF_NEXT_PLANE__VALUE		0xffff
+#define     FIRST_BLOCK_OF_NEXT_PLANE__VALUE		GENMASK(15, 0)
 
 #define WRITE_PROTECT				0x280
-#define     WRITE_PROTECT__FLAG				0x0001
+#define     WRITE_PROTECT__FLAG				BIT(0)
 
 #define RE_2_RE					0x290
-#define     RE_2_RE__VALUE				0x003f
+#define     RE_2_RE__VALUE				GENMASK(5, 0)
 
 #define MANUFACTURER_ID				0x300
-#define     MANUFACTURER_ID__VALUE			0x00ff
+#define     MANUFACTURER_ID__VALUE			GENMASK(7, 0)
 
 #define DEVICE_ID				0x310
-#define     DEVICE_ID__VALUE				0x00ff
+#define     DEVICE_ID__VALUE				GENMASK(7, 0)
 
 #define DEVICE_PARAM_0				0x320
-#define     DEVICE_PARAM_0__VALUE			0x00ff
+#define     DEVICE_PARAM_0__VALUE			GENMASK(7, 0)
 
 #define DEVICE_PARAM_1				0x330
-#define     DEVICE_PARAM_1__VALUE			0x00ff
+#define     DEVICE_PARAM_1__VALUE			GENMASK(7, 0)
 
 #define DEVICE_PARAM_2				0x340
-#define     DEVICE_PARAM_2__VALUE			0x00ff
+#define     DEVICE_PARAM_2__VALUE			GENMASK(7, 0)
 
 #define LOGICAL_PAGE_DATA_SIZE			0x350
-#define     LOGICAL_PAGE_DATA_SIZE__VALUE		0xffff
+#define     LOGICAL_PAGE_DATA_SIZE__VALUE		GENMASK(15, 0)
 
 #define LOGICAL_PAGE_SPARE_SIZE			0x360
-#define     LOGICAL_PAGE_SPARE_SIZE__VALUE		0xffff
+#define     LOGICAL_PAGE_SPARE_SIZE__VALUE		GENMASK(15, 0)
 
 #define REVISION				0x370
-#define     REVISION__VALUE				0xffff
+#define     REVISION__VALUE				GENMASK(15, 0)
 
 #define ONFI_DEVICE_FEATURES			0x380
-#define     ONFI_DEVICE_FEATURES__VALUE			0x003f
+#define     ONFI_DEVICE_FEATURES__VALUE			GENMASK(5, 0)
 
 #define ONFI_OPTIONAL_COMMANDS			0x390
-#define     ONFI_OPTIONAL_COMMANDS__VALUE		0x003f
+#define     ONFI_OPTIONAL_COMMANDS__VALUE		GENMASK(5, 0)
 
 #define ONFI_TIMING_MODE			0x3a0
-#define     ONFI_TIMING_MODE__VALUE			0x003f
+#define     ONFI_TIMING_MODE__VALUE			GENMASK(5, 0)
 
 #define ONFI_PGM_CACHE_TIMING_MODE		0x3b0
-#define     ONFI_PGM_CACHE_TIMING_MODE__VALUE		0x003f
+#define     ONFI_PGM_CACHE_TIMING_MODE__VALUE		GENMASK(5, 0)
 
 #define ONFI_DEVICE_NO_OF_LUNS			0x3c0
-#define     ONFI_DEVICE_NO_OF_LUNS__NO_OF_LUNS		0x00ff
-#define     ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE		0x0100
+#define     ONFI_DEVICE_NO_OF_LUNS__NO_OF_LUNS		GENMASK(7, 0)
+#define     ONFI_DEVICE_NO_OF_LUNS__ONFI_DEVICE		BIT(8)
 
 #define ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L	0x3d0
-#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L__VALUE	0xffff
+#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_L__VALUE	GENMASK(15, 0)
 
 #define ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U	0x3e0
-#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U__VALUE	0xffff
+#define     ONFI_DEVICE_NO_OF_BLOCKS_PER_LUN_U__VALUE	GENMASK(15, 0)
 
-#define FEATURES					0x3f0
-#define     FEATURES__N_BANKS				0x0003
-#define     FEATURES__ECC_MAX_ERR			0x003c
-#define     FEATURES__DMA				0x0040
-#define     FEATURES__CMD_DMA				0x0080
-#define     FEATURES__PARTITION				0x0100
-#define     FEATURES__XDMA_SIDEBAND			0x0200
-#define     FEATURES__GPREG				0x0400
-#define     FEATURES__INDEX_ADDR			0x0800
+#define FEATURES				0x3f0
+#define     FEATURES__N_BANKS				GENMASK(1, 0)
+#define     FEATURES__ECC_MAX_ERR			GENMASK(5, 2)
+#define     FEATURES__DMA				BIT(6)
+#define     FEATURES__CMD_DMA				BIT(7)
+#define     FEATURES__PARTITION				BIT(8)
+#define     FEATURES__XDMA_SIDEBAND			BIT(9)
+#define     FEATURES__GPREG				BIT(10)
+#define     FEATURES__INDEX_ADDR			BIT(11)
 
 #define TRANSFER_MODE				0x400
-#define     TRANSFER_MODE__VALUE			0x0003
+#define     TRANSFER_MODE__VALUE			GENMASK(1, 0)
 
-#define INTR_STATUS(__bank)	(0x410 + ((__bank) * 0x50))
-#define INTR_EN(__bank)		(0x420 + ((__bank) * 0x50))
+#define INTR_STATUS(bank)			(0x410 + (bank) * 0x50)
+#define INTR_EN(bank)				(0x420 + (bank) * 0x50)
 /* bit[1:0] is used differently depending on IP version */
-#define     INTR__ECC_UNCOR_ERR				0x0001	/* new IP */
-#define     INTR__ECC_TRANSACTION_DONE			0x0001	/* old IP */
-#define     INTR__ECC_ERR				0x0002	/* old IP */
-#define     INTR__DMA_CMD_COMP				0x0004
-#define     INTR__TIME_OUT				0x0008
-#define     INTR__PROGRAM_FAIL				0x0010
-#define     INTR__ERASE_FAIL				0x0020
-#define     INTR__LOAD_COMP				0x0040
-#define     INTR__PROGRAM_COMP				0x0080
-#define     INTR__ERASE_COMP				0x0100
-#define     INTR__PIPE_CPYBCK_CMD_COMP			0x0200
-#define     INTR__LOCKED_BLK				0x0400
-#define     INTR__UNSUP_CMD				0x0800
-#define     INTR__INT_ACT				0x1000
-#define     INTR__RST_COMP				0x2000
-#define     INTR__PIPE_CMD_ERR				0x4000
-#define     INTR__PAGE_XFER_INC				0x8000
+#define     INTR__ECC_UNCOR_ERR				BIT(0)	/* new IP */
+#define     INTR__ECC_TRANSACTION_DONE			BIT(0)	/* old IP */
+#define     INTR__ECC_ERR				BIT(1)	/* old IP */
+#define     INTR__DMA_CMD_COMP				BIT(2)
+#define     INTR__TIME_OUT				BIT(3)
+#define     INTR__PROGRAM_FAIL				BIT(4)
+#define     INTR__ERASE_FAIL				BIT(5)
+#define     INTR__LOAD_COMP				BIT(6)
+#define     INTR__PROGRAM_COMP				BIT(7)
+#define     INTR__ERASE_COMP				BIT(8)
+#define     INTR__PIPE_CPYBCK_CMD_COMP			BIT(9)
+#define     INTR__LOCKED_BLK				BIT(10)
+#define     INTR__UNSUP_CMD				BIT(11)
+#define     INTR__INT_ACT				BIT(12)
+#define     INTR__RST_COMP				BIT(13)
+#define     INTR__PIPE_CMD_ERR				BIT(14)
+#define     INTR__PAGE_XFER_INC				BIT(15)
+#define     INTR__ERASED_PAGE				BIT(16)
 
-#define PAGE_CNT(__bank)	(0x430 + ((__bank) * 0x50))
-#define ERR_PAGE_ADDR(__bank)	(0x440 + ((__bank) * 0x50))
-#define ERR_BLOCK_ADDR(__bank)	(0x450 + ((__bank) * 0x50))
+#define PAGE_CNT(bank)				(0x430 + (bank) * 0x50)
+#define ERR_PAGE_ADDR(bank)			(0x440 + (bank) * 0x50)
+#define ERR_BLOCK_ADDR(bank)			(0x450 + (bank) * 0x50)
 
 #define ECC_THRESHOLD				0x600
-#define     ECC_THRESHOLD__VALUE			0x03ff
+#define     ECC_THRESHOLD__VALUE			GENMASK(9, 0)
 
 #define ECC_ERROR_BLOCK_ADDRESS			0x610
-#define     ECC_ERROR_BLOCK_ADDRESS__VALUE		0xffff
+#define     ECC_ERROR_BLOCK_ADDRESS__VALUE		GENMASK(15, 0)
 
 #define ECC_ERROR_PAGE_ADDRESS			0x620
-#define     ECC_ERROR_PAGE_ADDRESS__VALUE		0x0fff
-#define     ECC_ERROR_PAGE_ADDRESS__BANK		0xf000
+#define     ECC_ERROR_PAGE_ADDRESS__VALUE		GENMASK(11, 0)
+#define     ECC_ERROR_PAGE_ADDRESS__BANK		GENMASK(15, 12)
 
 #define ECC_ERROR_ADDRESS			0x630
-#define     ECC_ERROR_ADDRESS__OFFSET			0x0fff
-#define     ECC_ERROR_ADDRESS__SECTOR_NR		0xf000
+#define     ECC_ERROR_ADDRESS__OFFSET			GENMASK(11, 0)
+#define     ECC_ERROR_ADDRESS__SECTOR_NR		GENMASK(15, 12)
 
 #define ERR_CORRECTION_INFO			0x640
-#define     ERR_CORRECTION_INFO__BYTEMASK		0x00ff
-#define     ERR_CORRECTION_INFO__DEVICE_NR		0x0f00
-#define     ERR_CORRECTION_INFO__ERROR_TYPE		0x4000
-#define     ERR_CORRECTION_INFO__LAST_ERR_INFO		0x8000
+#define     ERR_CORRECTION_INFO__BYTEMASK		GENMASK(7, 0)
+#define     ERR_CORRECTION_INFO__DEVICE_NR		GENMASK(11, 8)
+#define     ERR_CORRECTION_INFO__ERROR_TYPE		BIT(14)
+#define     ERR_CORRECTION_INFO__LAST_ERR_INFO		BIT(15)
 
 #define ECC_COR_INFO(bank)			(0x650 + (bank) / 2 * 0x10)
 #define     ECC_COR_INFO__SHIFT(bank)			((bank) % 2 * 8)
-#define     ECC_COR_INFO__MAX_ERRORS			0x007f
-#define     ECC_COR_INFO__UNCOR_ERR			0x0080
+#define     ECC_COR_INFO__MAX_ERRORS			GENMASK(6, 0)
+#define     ECC_COR_INFO__UNCOR_ERR			BIT(7)
+
+#define CFG_DATA_BLOCK_SIZE			0x6b0
+
+#define CFG_LAST_DATA_BLOCK_SIZE		0x6c0
+
+#define CFG_NUM_DATA_BLOCKS			0x6d0
+
+#define CFG_META_DATA_SIZE			0x6e0
 
 #define DMA_ENABLE				0x700
-#define     DMA_ENABLE__FLAG				0x0001
+#define     DMA_ENABLE__FLAG				BIT(0)
 
 #define IGNORE_ECC_DONE				0x710
-#define     IGNORE_ECC_DONE__FLAG			0x0001
+#define     IGNORE_ECC_DONE__FLAG			BIT(0)
 
 #define DMA_INTR				0x720
 #define DMA_INTR_EN				0x730
-#define     DMA_INTR__TARGET_ERROR			0x0001
-#define     DMA_INTR__DESC_COMP_CHANNEL0		0x0002
-#define     DMA_INTR__DESC_COMP_CHANNEL1		0x0004
-#define     DMA_INTR__DESC_COMP_CHANNEL2		0x0008
-#define     DMA_INTR__DESC_COMP_CHANNEL3		0x0010
-#define     DMA_INTR__MEMCOPY_DESC_COMP			0x0020
+#define     DMA_INTR__TARGET_ERROR			BIT(0)
+#define     DMA_INTR__DESC_COMP_CHANNEL0		BIT(1)
+#define     DMA_INTR__DESC_COMP_CHANNEL1		BIT(2)
+#define     DMA_INTR__DESC_COMP_CHANNEL2		BIT(3)
+#define     DMA_INTR__DESC_COMP_CHANNEL3		BIT(4)
+#define     DMA_INTR__MEMCOPY_DESC_COMP			BIT(5)
 
 #define TARGET_ERR_ADDR_LO			0x740
-#define     TARGET_ERR_ADDR_LO__VALUE			0xffff
+#define     TARGET_ERR_ADDR_LO__VALUE			GENMASK(15, 0)
 
 #define TARGET_ERR_ADDR_HI			0x750
-#define     TARGET_ERR_ADDR_HI__VALUE			0xffff
+#define     TARGET_ERR_ADDR_HI__VALUE			GENMASK(15, 0)
 
 #define CHNL_ACTIVE				0x760
-#define     CHNL_ACTIVE__CHANNEL0			0x0001
-#define     CHNL_ACTIVE__CHANNEL1			0x0002
-#define     CHNL_ACTIVE__CHANNEL2			0x0004
-#define     CHNL_ACTIVE__CHANNEL3			0x0008
-
-#define FAIL 1                  /*failed flag*/
-#define PASS 0                  /*success flag*/
-
-#define CLK_X  5
-#define CLK_MULTI 4
-
-#define ONFI_BLOOM_TIME         1
-#define MODE5_WORKAROUND        0
-
-
-#define MODE_00    0x00000000
-#define MODE_01    0x04000000
-#define MODE_10    0x08000000
-#define MODE_11    0x0C000000
-
-#define ECC_SECTOR_SIZE     512
-
-struct nand_buf {
-	int head;
-	int tail;
-	uint8_t *buf;
-	dma_addr_t dma_buf;
-};
-
-#define INTEL_CE4100	1
-#define INTEL_MRST	2
-#define DT		3
+#define     CHNL_ACTIVE__CHANNEL0			BIT(0)
+#define     CHNL_ACTIVE__CHANNEL1			BIT(1)
+#define     CHNL_ACTIVE__CHANNEL2			BIT(2)
+#define     CHNL_ACTIVE__CHANNEL3			BIT(3)
 
 struct denali_nand_info {
 	struct nand_chip nand;
-	int flash_bank; /* currently selected chip */
-	int status;
-	int platform;
-	struct nand_buf buf;
+	unsigned long clk_x_rate;	/* bus interface clock rate */
+	int active_bank;		/* currently selected bank */
 	struct device *dev;
-	int total_used_banks;
-	int page;
-	void __iomem *flash_reg;	/* Register Interface */
-	void __iomem *flash_mem;	/* Host Data/Command Interface */
+	void __iomem *reg;		/* Register Interface */
+	void __iomem *host;		/* Host Data/Command Interface */
 
 	/* elements used by ISR */
 	struct completion complete;
 	spinlock_t irq_lock;
+	uint32_t irq_mask;
 	uint32_t irq_status;
 	int irq;
 
-	int devnum;	/* represent how many nands connected */
-	int bbtskipbytes;
+	void *buf;
+	dma_addr_t dma_addr;
+	int dma_avail;
+	int devs_per_cs;		/* devices connected in parallel */
+	int oob_skip_bytes;
 	int max_banks;
 	unsigned int revision;
 	unsigned int caps;
+	const struct nand_ecc_caps *ecc_caps;
 };
 
 #define DENALI_CAP_HW_ECC_FIXUP			BIT(0)
 #define DENALI_CAP_DMA_64BIT			BIT(1)
 
+int denali_calc_ecc_bytes(int step_size, int strength);
 extern int denali_init(struct denali_nand_info *denali);
 extern void denali_remove(struct denali_nand_info *denali);
 

diff --git a/drivers/mtd/nand/denali_dt.c b/drivers/mtd/nand/denali_dt.c
index df9ef36..47f398e 100644
--- a/drivers/mtd/nand/denali_dt.c
+++ b/drivers/mtd/nand/denali_dt.c

@@ -32,10 +32,31 @@ struct denali_dt {
 struct denali_dt_data {
 	unsigned int revision;
 	unsigned int caps;
+	const struct nand_ecc_caps *ecc_caps;
 };
 
+NAND_ECC_CAPS_SINGLE(denali_socfpga_ecc_caps, denali_calc_ecc_bytes,
+		     512, 8, 15);
 static const struct denali_dt_data denali_socfpga_data = {
 	.caps = DENALI_CAP_HW_ECC_FIXUP,
+	.ecc_caps = &denali_socfpga_ecc_caps,
+};
+
+NAND_ECC_CAPS_SINGLE(denali_uniphier_v5a_ecc_caps, denali_calc_ecc_bytes,
+		     1024, 8, 16, 24);
+static const struct denali_dt_data denali_uniphier_v5a_data = {
+	.caps = DENALI_CAP_HW_ECC_FIXUP |
+		DENALI_CAP_DMA_64BIT,
+	.ecc_caps = &denali_uniphier_v5a_ecc_caps,
+};
+
+NAND_ECC_CAPS_SINGLE(denali_uniphier_v5b_ecc_caps, denali_calc_ecc_bytes,
+		     1024, 8, 16);
+static const struct denali_dt_data denali_uniphier_v5b_data = {
+	.revision = 0x0501,
+	.caps = DENALI_CAP_HW_ECC_FIXUP |
+		DENALI_CAP_DMA_64BIT,
+	.ecc_caps = &denali_uniphier_v5b_ecc_caps,
 };
 
 static const struct of_device_id denali_nand_dt_ids[] = {
@@ -43,13 +64,21 @@ static const struct of_device_id denali_nand_dt_ids[] = {
 		.compatible = "altr,socfpga-denali-nand",
 		.data = &denali_socfpga_data,
 	},
+	{
+		.compatible = "socionext,uniphier-denali-nand-v5a",
+		.data = &denali_uniphier_v5a_data,
+	},
+	{
+		.compatible = "socionext,uniphier-denali-nand-v5b",
+		.data = &denali_uniphier_v5b_data,
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
 static int denali_dt_probe(struct platform_device *pdev)
 {
-	struct resource *denali_reg, *nand_data;
+	struct resource *res;
 	struct denali_dt *dt;
 	const struct denali_dt_data *data;
 	struct denali_nand_info *denali;
@@ -64,9 +93,9 @@ static int denali_dt_probe(struct platform_device *pdev)
 	if (data) {
 		denali->revision = data->revision;
 		denali->caps = data->caps;
+		denali->ecc_caps = data->ecc_caps;
 	}
 
-	denali->platform = DT;
 	denali->dev = &pdev->dev;
 	denali->irq = platform_get_irq(pdev, 0);
 	if (denali->irq < 0) {
@@ -74,17 +103,15 @@ static int denali_dt_probe(struct platform_device *pdev)
 		return denali->irq;
 	}
 
-	denali_reg = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						  "denali_reg");
-	denali->flash_reg = devm_ioremap_resource(&pdev->dev, denali_reg);
-	if (IS_ERR(denali->flash_reg))
-		return PTR_ERR(denali->flash_reg);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
+	denali->reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(denali->reg))
+		return PTR_ERR(denali->reg);
 
-	nand_data = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						 "nand_data");
-	denali->flash_mem = devm_ioremap_resource(&pdev->dev, nand_data);
-	if (IS_ERR(denali->flash_mem))
-		return PTR_ERR(denali->flash_mem);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
+	denali->host = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(denali->host))
+		return PTR_ERR(denali->host);
 
 	dt->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dt->clk)) {
@@ -93,6 +120,8 @@ static int denali_dt_probe(struct platform_device *pdev)
 	}
 	clk_prepare_enable(dt->clk);
 
+	denali->clk_x_rate = clk_get_rate(dt->clk);
+
 	ret = denali_init(denali);
 	if (ret)
 		goto out_disable_clk;

diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c
index ac84323..81370c7 100644
--- a/drivers/mtd/nand/denali_pci.c
+++ b/drivers/mtd/nand/denali_pci.c

@@ -19,6 +19,9 @@
 
 #define DENALI_NAND_NAME    "denali-nand-pci"
 
+#define INTEL_CE4100	1
+#define INTEL_MRST	2
+
 /* List of platforms this NAND controller has be integrated into */
 static const struct pci_device_id denali_pci_ids[] = {
 	{ PCI_VDEVICE(INTEL, 0x0701), INTEL_CE4100 },
@@ -27,6 +30,8 @@ static const struct pci_device_id denali_pci_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, denali_pci_ids);
 
+NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15);
+
 static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int ret;
@@ -45,13 +50,11 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	}
 
 	if (id->driver_data == INTEL_CE4100) {
-		denali->platform = INTEL_CE4100;
 		mem_base = pci_resource_start(dev, 0);
 		mem_len = pci_resource_len(dev, 1);
 		csr_base = pci_resource_start(dev, 1);
 		csr_len = pci_resource_len(dev, 1);
 	} else {
-		denali->platform = INTEL_MRST;
 		csr_base = pci_resource_start(dev, 0);
 		csr_len = pci_resource_len(dev, 0);
 		mem_base = pci_resource_start(dev, 1);
@@ -65,6 +68,9 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	pci_set_master(dev);
 	denali->dev = &dev->dev;
 	denali->irq = dev->irq;
+	denali->ecc_caps = &denali_pci_ecc_caps;
+	denali->nand.ecc.options |= NAND_ECC_MAXIMIZE;
+	denali->clk_x_rate = 200000000;		/* 200 MHz */
 
 	ret = pci_request_regions(dev, DENALI_NAND_NAME);
 	if (ret) {
@@ -72,14 +78,14 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return ret;
 	}
 
-	denali->flash_reg = ioremap_nocache(csr_base, csr_len);
-	if (!denali->flash_reg) {
+	denali->reg = ioremap_nocache(csr_base, csr_len);
+	if (!denali->reg) {
 		dev_err(&dev->dev, "Spectra: Unable to remap memory region\n");
 		return -ENOMEM;
 	}
 
-	denali->flash_mem = ioremap_nocache(mem_base, mem_len);
-	if (!denali->flash_mem) {
+	denali->host = ioremap_nocache(mem_base, mem_len);
+	if (!denali->host) {
 		dev_err(&dev->dev, "Spectra: ioremap_nocache failed!");
 		ret = -ENOMEM;
 		goto failed_remap_reg;
@@ -94,9 +100,9 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	return 0;
 
 failed_remap_mem:
-	iounmap(denali->flash_mem);
+	iounmap(denali->host);
 failed_remap_reg:
-	iounmap(denali->flash_reg);
+	iounmap(denali->reg);
 	return ret;
 }
 
@@ -106,8 +112,8 @@ static void denali_pci_remove(struct pci_dev *dev)
 	struct denali_nand_info *denali = pci_get_drvdata(dev);
 
 	denali_remove(denali);
-	iounmap(denali->flash_reg);
-	iounmap(denali->flash_mem);
+	iounmap(denali->reg);
+	iounmap(denali->host);
 }
 
 static struct pci_driver denali_pci_driver = {

diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 7af2a3c..a27a84f 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c

@@ -1260,6 +1260,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
 	nand->erase = docg4_erase_block;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 113f76e..b9ac16f 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c

@@ -775,6 +775,8 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->cmdfunc = fsl_elbc_cmdfunc;
 	chip->waitfunc = fsl_elbc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;

diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index d1570f5..59408ec 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c

@@ -171,34 +171,6 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob)
 		ifc_nand_ctrl->index += mtd->writesize;
 }
 
-static int is_blank(struct mtd_info *mtd, unsigned int bufnum)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
-	u8 __iomem *addr = priv->vbase + bufnum * (mtd->writesize * 2);
-	u32 __iomem *mainarea = (u32 __iomem *)addr;
-	u8 __iomem *oob = addr + mtd->writesize;
-	struct mtd_oob_region oobregion = { };
-	int i, section = 0;
-
-	for (i = 0; i < mtd->writesize / 4; i++) {
-		if (__raw_readl(&mainarea[i]) != 0xffffffff)
-			return 0;
-	}
-
-	mtd_ooblayout_ecc(mtd, section++, &oobregion);
-	while (oobregion.length) {
-		for (i = 0; i < oobregion.length; i++) {
-			if (__raw_readb(&oob[oobregion.offset + i]) != 0xff)
-				return 0;
-		}
-
-		mtd_ooblayout_ecc(mtd, section++, &oobregion);
-	}
-
-	return 1;
-}
-
 /* returns nonzero if entire page is blank */
 static int check_read_ecc(struct mtd_info *mtd, struct fsl_ifc_ctrl *ctrl,
 			  u32 *eccstat, unsigned int bufnum)
@@ -274,16 +246,14 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
 			if (errors == 15) {
 				/*
 				 * Uncorrectable error.
-				 * OK only if the whole page is blank.
+				 * We'll check for blank pages later.
 				 *
 				 * We disable ECCER reporting due to...
 				 * erratum IFC-A002770 -- so report it now if we
 				 * see an uncorrectable error in ECCSTAT.
 				 */
-				if (!is_blank(mtd, bufnum))
-					ctrl->nand_stat |=
-						IFC_NAND_EVTER_STAT_ECCER;
-				break;
+				ctrl->nand_stat |= IFC_NAND_EVTER_STAT_ECCER;
+				continue;
 			}
 
 			mtd->ecc_stats.corrected += errors;
@@ -678,6 +648,39 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	return nand_fsr | NAND_STATUS_WP;
 }
 
+/*
+ * The controller does not check for bitflips in erased pages,
+ * therefore software must check instead.
+ */
+static int check_erased_page(struct nand_chip *chip, u8 *buf)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	u8 *ecc = chip->oob_poi;
+	const int ecc_size = chip->ecc.bytes;
+	const int pkt_size = chip->ecc.size;
+	int i, res, bitflips = 0;
+	struct mtd_oob_region oobregion = { };
+
+	mtd_ooblayout_ecc(mtd, 0, &oobregion);
+	ecc += oobregion.offset;
+
+	for (i = 0; i < chip->ecc.steps; ++i) {
+		res = nand_check_erased_ecc_chunk(buf, pkt_size, ecc, ecc_size,
+						  NULL, 0,
+						  chip->ecc.strength);
+		if (res < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += res;
+
+		bitflips = max(res, bitflips);
+		buf += pkt_size;
+		ecc += ecc_size;
+	}
+
+	return bitflips;
+}
+
 static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			     uint8_t *buf, int oob_required, int page)
 {
@@ -689,8 +692,12 @@ static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (oob_required)
 		fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
 
-	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_ECCER)
-		dev_err(priv->dev, "NAND Flash ECC Uncorrectable Error\n");
+	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_ECCER) {
+		if (!oob_required)
+			fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+		return check_erased_page(chip, buf);
+	}
 
 	if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC)
 		mtd->ecc_stats.failed++;
@@ -831,6 +838,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->cmdfunc = fsl_ifc_cmdfunc;
 	chip->waitfunc = fsl_ifc_wait;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
@@ -904,7 +913,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.algo = NAND_ECC_HAMMING;
 	}
 
-	if (ctrl->version == FSL_IFC_VERSION_1_1_0)
+	if (ctrl->version >= FSL_IFC_VERSION_1_1_0)
 		fsl_ifc_sram_init(priv);
 
 	return 0;

diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index cea50d2..9d8b051 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c

@@ -302,25 +302,13 @@ static void fsmc_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
  * This routine initializes timing parameters related to NAND memory access in
  * FSMC registers
  */
-static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
-			   uint32_t busw, struct fsmc_nand_timings *timings)
+static void fsmc_nand_setup(struct fsmc_nand_data *host,
+			    struct fsmc_nand_timings *tims)
 {
 	uint32_t value = FSMC_DEVTYPE_NAND | FSMC_ENABLE | FSMC_WAITON;
 	uint32_t tclr, tar, thiz, thold, twait, tset;
-	struct fsmc_nand_timings *tims;
-	struct fsmc_nand_timings default_timings = {
-		.tclr	= FSMC_TCLR_1,
-		.tar	= FSMC_TAR_1,
-		.thiz	= FSMC_THIZ_1,
-		.thold	= FSMC_THOLD_4,
-		.twait	= FSMC_TWAIT_6,
-		.tset	= FSMC_TSET_0,
-	};
-
-	if (timings)
-		tims = timings;
-	else
-		tims = &default_timings;
+	unsigned int bank = host->bank;
+	void __iomem *regs = host->regs_va;
 
 	tclr = (tims->tclr & FSMC_TCLR_MASK) << FSMC_TCLR_SHIFT;
 	tar = (tims->tar & FSMC_TAR_MASK) << FSMC_TAR_SHIFT;
@@ -329,7 +317,7 @@ static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
 	twait = (tims->twait & FSMC_TWAIT_MASK) << FSMC_TWAIT_SHIFT;
 	tset = (tims->tset & FSMC_TSET_MASK) << FSMC_TSET_SHIFT;
 
-	if (busw)
+	if (host->nand.options & NAND_BUSWIDTH_16)
 		writel_relaxed(value | FSMC_DEVWID_16,
 				FSMC_NAND_REG(regs, bank, PC));
 	else
@@ -344,6 +332,87 @@ static void fsmc_nand_setup(void __iomem *regs, uint32_t bank,
 			FSMC_NAND_REG(regs, bank, ATTRIB));
 }
 
+static int fsmc_calc_timings(struct fsmc_nand_data *host,
+			     const struct nand_sdr_timings *sdrt,
+			     struct fsmc_nand_timings *tims)
+{
+	unsigned long hclk = clk_get_rate(host->clk);
+	unsigned long hclkn = NSEC_PER_SEC / hclk;
+	uint32_t thiz, thold, twait, tset;
+
+	if (sdrt->tRC_min < 30000)
+		return -EOPNOTSUPP;
+
+	tims->tar = DIV_ROUND_UP(sdrt->tAR_min / 1000, hclkn) - 1;
+	if (tims->tar > FSMC_TAR_MASK)
+		tims->tar = FSMC_TAR_MASK;
+	tims->tclr = DIV_ROUND_UP(sdrt->tCLR_min / 1000, hclkn) - 1;
+	if (tims->tclr > FSMC_TCLR_MASK)
+		tims->tclr = FSMC_TCLR_MASK;
+
+	thiz = sdrt->tCS_min - sdrt->tWP_min;
+	tims->thiz = DIV_ROUND_UP(thiz / 1000, hclkn);
+
+	thold = sdrt->tDH_min;
+	if (thold < sdrt->tCH_min)
+		thold = sdrt->tCH_min;
+	if (thold < sdrt->tCLH_min)
+		thold = sdrt->tCLH_min;
+	if (thold < sdrt->tWH_min)
+		thold = sdrt->tWH_min;
+	if (thold < sdrt->tALH_min)
+		thold = sdrt->tALH_min;
+	if (thold < sdrt->tREH_min)
+		thold = sdrt->tREH_min;
+	tims->thold = DIV_ROUND_UP(thold / 1000, hclkn);
+	if (tims->thold == 0)
+		tims->thold = 1;
+	else if (tims->thold > FSMC_THOLD_MASK)
+		tims->thold = FSMC_THOLD_MASK;
+
+	twait = max(sdrt->tRP_min, sdrt->tWP_min);
+	tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1;
+	if (tims->twait == 0)
+		tims->twait = 1;
+	else if (tims->twait > FSMC_TWAIT_MASK)
+		tims->twait = FSMC_TWAIT_MASK;
+
+	tset = max(sdrt->tCS_min - sdrt->tWP_min,
+		   sdrt->tCEA_max - sdrt->tREA_max);
+	tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1;
+	if (tims->tset == 0)
+		tims->tset = 1;
+	else if (tims->tset > FSMC_TSET_MASK)
+		tims->tset = FSMC_TSET_MASK;
+
+	return 0;
+}
+
+static int fsmc_setup_data_interface(struct mtd_info *mtd, int csline,
+				     const struct nand_data_interface *conf)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct fsmc_nand_data *host = nand_get_controller_data(nand);
+	struct fsmc_nand_timings tims;
+	const struct nand_sdr_timings *sdrt;
+	int ret;
+
+	sdrt = nand_get_sdr_timings(conf);
+	if (IS_ERR(sdrt))
+		return PTR_ERR(sdrt);
+
+	ret = fsmc_calc_timings(host, sdrt, &tims);
+	if (ret)
+		return ret;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	fsmc_nand_setup(host, &tims);
+
+	return 0;
+}
+
 /*
  * fsmc_enable_hwecc - Enables Hardware ECC through FSMC registers
  */
@@ -796,10 +865,8 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 		return -ENOMEM;
 	ret = of_property_read_u8_array(np, "timings", (u8 *)host->dev_timings,
 						sizeof(*host->dev_timings));
-	if (ret) {
-		dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
+	if (ret)
 		host->dev_timings = NULL;
-	}
 
 	/* Set default NAND bank to 0 */
 	host->bank = 0;
@@ -933,9 +1000,10 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		break;
 	}
 
-	fsmc_nand_setup(host->regs_va, host->bank,
-			nand->options & NAND_BUSWIDTH_16,
-			host->dev_timings);
+	if (host->dev_timings)
+		fsmc_nand_setup(host, host->dev_timings);
+	else
+		nand->setup_data_interface = fsmc_setup_data_interface;
 
 	if (AMBA_REV_BITS(host->pid) >= 8) {
 		nand->ecc.read_page = fsmc_read_page_hwecc;
@@ -986,6 +1054,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 				break;
 			}
 
+		case NAND_ECC_ON_DIE:
+			break;
+
 		default:
 			dev_err(&pdev->dev, "Unsupported ECC mode!\n");
 			goto err_probe;
@@ -1073,9 +1144,8 @@ static int fsmc_nand_resume(struct device *dev)
 	struct fsmc_nand_data *host = dev_get_drvdata(dev);
 	if (host) {
 		clk_prepare_enable(host->clk);
-		fsmc_nand_setup(host->regs_va, host->bank,
-				host->nand.options & NAND_BUSWIDTH_16,
-				host->dev_timings);
+		if (host->dev_timings)
+			fsmc_nand_setup(host, host->dev_timings);
 	}
 	return 0;
 }

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 141bd70..9778724 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c

@@ -26,7 +26,7 @@
 #include "gpmi-regs.h"
 #include "bch-regs.h"
 
-static struct timing_threshod timing_default_threshold = {
+static struct timing_threshold timing_default_threshold = {
 	.max_data_setup_cycles       = (BM_GPMI_TIMING0_DATA_SETUP >>
 						BP_GPMI_TIMING0_DATA_SETUP),
 	.internal_data_setup_in_ns   = 0,
@@ -329,7 +329,7 @@ static unsigned int ns_to_cycles(unsigned int time,
 static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this,
 					struct gpmi_nfc_hardware_timing *hw)
 {
-	struct timing_threshod *nfc = &timing_default_threshold;
+	struct timing_threshold *nfc = &timing_default_threshold;
 	struct resources *r = &this->resources;
 	struct nand_chip *nand = &this->nand;
 	struct nand_timing target = this->timing;
@@ -932,7 +932,7 @@ static int enable_edo_mode(struct gpmi_nand_data *this, int mode)
 
 	nand->select_chip(mtd, 0);
 
-	/* [1] send SET FEATURE commond to NAND */
+	/* [1] send SET FEATURE command to NAND */
 	feature[0] = mode;
 	ret = nand->onfi_set_features(mtd, nand,
 				ONFI_FEATURE_ADDR_TIMING_MODE, feature);

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index d521396..50f8d4a 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

@@ -82,6 +82,10 @@ static int gpmi_ooblayout_free(struct mtd_info *mtd, int section,
 	return 0;
 }
 
+static const char * const gpmi_clks_for_mx2x[] = {
+	"gpmi_io",
+};
+
 static const struct mtd_ooblayout_ops gpmi_ooblayout_ops = {
 	.ecc = gpmi_ooblayout_ecc,
 	.free = gpmi_ooblayout_free,
@@ -91,24 +95,48 @@ static const struct gpmi_devdata gpmi_devdata_imx23 = {
 	.type = IS_MX23,
 	.bch_max_ecc_strength = 20,
 	.max_chain_delay = 16,
+	.clks = gpmi_clks_for_mx2x,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx2x),
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx28 = {
 	.type = IS_MX28,
 	.bch_max_ecc_strength = 20,
 	.max_chain_delay = 16,
+	.clks = gpmi_clks_for_mx2x,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx2x),
+};
+
+static const char * const gpmi_clks_for_mx6[] = {
+	"gpmi_io", "gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch",
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx6q = {
 	.type = IS_MX6Q,
 	.bch_max_ecc_strength = 40,
 	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx6,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx6),
 };
 
 static const struct gpmi_devdata gpmi_devdata_imx6sx = {
 	.type = IS_MX6SX,
 	.bch_max_ecc_strength = 62,
 	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx6,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx6),
+};
+
+static const char * const gpmi_clks_for_mx7d[] = {
+	"gpmi_io", "gpmi_bch_apb",
+};
+
+static const struct gpmi_devdata gpmi_devdata_imx7d = {
+	.type = IS_MX7D,
+	.bch_max_ecc_strength = 62,
+	.max_chain_delay = 12,
+	.clks = gpmi_clks_for_mx7d,
+	.clks_count = ARRAY_SIZE(gpmi_clks_for_mx7d),
 };
 
 static irqreturn_t bch_irq(int irq, void *cookie)
@@ -599,35 +627,14 @@ static int acquire_dma_channels(struct gpmi_nand_data *this)
 	return -EINVAL;
 }
 
-static char *extra_clks_for_mx6q[GPMI_CLK_MAX] = {
-	"gpmi_apb", "gpmi_bch", "gpmi_bch_apb", "per1_bch",
-};
-
 static int gpmi_get_clks(struct gpmi_nand_data *this)
 {
 	struct resources *r = &this->resources;
-	char **extra_clks = NULL;
 	struct clk *clk;
 	int err, i;
 
-	/* The main clock is stored in the first. */
-	r->clock[0] = devm_clk_get(this->dev, "gpmi_io");
-	if (IS_ERR(r->clock[0])) {
-		err = PTR_ERR(r->clock[0]);
-		goto err_clock;
-	}
-
-	/* Get extra clocks */
-	if (GPMI_IS_MX6(this))
-		extra_clks = extra_clks_for_mx6q;
-	if (!extra_clks)
-		return 0;
-
-	for (i = 1; i < GPMI_CLK_MAX; i++) {
-		if (extra_clks[i - 1] == NULL)
-			break;
-
-		clk = devm_clk_get(this->dev, extra_clks[i - 1]);
+	for (i = 0; i < this->devdata->clks_count; i++) {
+		clk = devm_clk_get(this->dev, this->devdata->clks[i]);
 		if (IS_ERR(clk)) {
 			err = PTR_ERR(clk);
 			goto err_clock;
@@ -1929,12 +1936,6 @@ static int gpmi_set_geometry(struct gpmi_nand_data *this)
 	return gpmi_alloc_dma_buffer(this);
 }
 
-static void gpmi_nand_exit(struct gpmi_nand_data *this)
-{
-	nand_release(nand_to_mtd(&this->nand));
-	gpmi_free_dma_buffer(this);
-}
-
 static int gpmi_init_last(struct gpmi_nand_data *this)
 {
 	struct nand_chip *chip = &this->nand;
@@ -2048,18 +2049,20 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 
 	ret = nand_boot_init(this);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 	ret = chip->scan_bbt(mtd);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret)
-		goto err_out;
+		goto err_nand_cleanup;
 	return 0;
 
+err_nand_cleanup:
+	nand_cleanup(chip);
 err_out:
-	gpmi_nand_exit(this);
+	gpmi_free_dma_buffer(this);
 	return ret;
 }
 
@@ -2076,6 +2079,9 @@ static const struct of_device_id gpmi_nand_id_table[] = {
 	}, {
 		.compatible = "fsl,imx6sx-gpmi-nand",
 		.data = &gpmi_devdata_imx6sx,
+	}, {
+		.compatible = "fsl,imx7d-gpmi-nand",
+		.data = &gpmi_devdata_imx7d,
 	}, {}
 };
 MODULE_DEVICE_TABLE(of, gpmi_nand_id_table);
@@ -2129,7 +2135,8 @@ static int gpmi_nand_remove(struct platform_device *pdev)
 {
 	struct gpmi_nand_data *this = platform_get_drvdata(pdev);
 
-	gpmi_nand_exit(this);
+	nand_release(nand_to_mtd(&this->nand));
+	gpmi_free_dma_buffer(this);
 	release_resources(this);
 	return 0;
 }

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 4e49a1f..9df0ad6 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h

@@ -123,13 +123,16 @@ enum gpmi_type {
 	IS_MX23,
 	IS_MX28,
 	IS_MX6Q,
-	IS_MX6SX
+	IS_MX6SX,
+	IS_MX7D,
 };
 
 struct gpmi_devdata {
 	enum gpmi_type type;
 	int bch_max_ecc_strength;
 	int max_chain_delay; /* See the async EDO mode */
+	const char * const *clks;
+	const int clks_count;
 };
 
 struct gpmi_nand_data {
@@ -231,7 +234,7 @@ struct gpmi_nfc_hardware_timing {
 };
 
 /**
- * struct timing_threshod - Timing threshold
+ * struct timing_threshold - Timing threshold
  * @max_data_setup_cycles:       The maximum number of data setup cycles that
  *                               can be expressed in the hardware.
  * @internal_data_setup_in_ns:   The time, in ns, that the NFC hardware requires
@@ -253,7 +256,7 @@ struct gpmi_nfc_hardware_timing {
  *                               progress, this is the clock frequency during
  *                               the most recent I/O transaction.
  */
-struct timing_threshod {
+struct timing_threshold {
 	const unsigned int      max_chip_count;
 	const unsigned int      max_data_setup_cycles;
 	const unsigned int      internal_data_setup_in_ns;
@@ -305,6 +308,8 @@ void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
 #define GPMI_IS_MX28(x)		((x)->devdata->type == IS_MX28)
 #define GPMI_IS_MX6Q(x)		((x)->devdata->type == IS_MX6Q)
 #define GPMI_IS_MX6SX(x)	((x)->devdata->type == IS_MX6SX)
+#define GPMI_IS_MX7D(x)		((x)->devdata->type == IS_MX7D)
 
-#define GPMI_IS_MX6(x)		(GPMI_IS_MX6Q(x) || GPMI_IS_MX6SX(x))
+#define GPMI_IS_MX6(x)		(GPMI_IS_MX6Q(x) || GPMI_IS_MX6SX(x) || \
+				 GPMI_IS_MX7D(x))
 #endif

diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index e40364e..530caa8 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c

@@ -764,6 +764,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->write_buf		= hisi_nfc_write_buf;
 	chip->read_buf		= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	hisi_nfc_host_init(host);
 

diff --git a/drivers/mtd/nand/jz4780_nand.c b/drivers/mtd/nand/jz4780_nand.c
index a39bb70..8bc835f 100644
--- a/drivers/mtd/nand/jz4780_nand.c
+++ b/drivers/mtd/nand/jz4780_nand.c

@@ -205,7 +205,7 @@ static int jz4780_nand_init_ecc(struct jz4780_nand_chip *nand, struct device *de
 		return -EINVAL;
 	}
 
-	mtd->ooblayout = &nand_ooblayout_lp_ops;
+	mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops);
 
 	return 0;
 }

diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 6d6eaed..0e86fb6 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c

@@ -708,6 +708,8 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->read_buf = mpc5121_nfc_read_buf;
 	chip->write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;

diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index dbf2562..6c3a4aa 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c

@@ -28,36 +28,16 @@
 
 #define ECC_IDLE_MASK		BIT(0)
 #define ECC_IRQ_EN		BIT(0)
+#define ECC_PG_IRQ_SEL		BIT(1)
 #define ECC_OP_ENABLE		(1)
 #define ECC_OP_DISABLE		(0)
 
 #define ECC_ENCCON		(0x00)
 #define ECC_ENCCNFG		(0x04)
-#define		ECC_CNFG_4BIT		(0)
-#define		ECC_CNFG_6BIT		(1)
-#define		ECC_CNFG_8BIT		(2)
-#define		ECC_CNFG_10BIT		(3)
-#define		ECC_CNFG_12BIT		(4)
-#define		ECC_CNFG_14BIT		(5)
-#define		ECC_CNFG_16BIT		(6)
-#define		ECC_CNFG_18BIT		(7)
-#define		ECC_CNFG_20BIT		(8)
-#define		ECC_CNFG_22BIT		(9)
-#define		ECC_CNFG_24BIT		(0xa)
-#define		ECC_CNFG_28BIT		(0xb)
-#define		ECC_CNFG_32BIT		(0xc)
-#define		ECC_CNFG_36BIT		(0xd)
-#define		ECC_CNFG_40BIT		(0xe)
-#define		ECC_CNFG_44BIT		(0xf)
-#define		ECC_CNFG_48BIT		(0x10)
-#define		ECC_CNFG_52BIT		(0x11)
-#define		ECC_CNFG_56BIT		(0x12)
-#define		ECC_CNFG_60BIT		(0x13)
 #define		ECC_MODE_SHIFT		(5)
 #define		ECC_MS_SHIFT		(16)
 #define ECC_ENCDIADDR		(0x08)
 #define ECC_ENCIDLE		(0x0C)
-#define ECC_ENCPAR(x)		(0x10 + (x) * sizeof(u32))
 #define ECC_ENCIRQ_EN		(0x80)
 #define ECC_ENCIRQ_STA		(0x84)
 #define ECC_DECCON		(0x100)
@@ -66,7 +46,6 @@
 #define		DEC_CNFG_CORRECT	(0x3 << 12)
 #define ECC_DECIDLE		(0x10C)
 #define ECC_DECENUM0		(0x114)
-#define		ERR_MASK		(0x3f)
 #define ECC_DECDONE		(0x124)
 #define ECC_DECIRQ_EN		(0x200)
 #define ECC_DECIRQ_STA		(0x204)
@@ -78,8 +57,17 @@
 #define ECC_IRQ_REG(op)		((op) == ECC_ENCODE ? \
 					ECC_ENCIRQ_EN : ECC_DECIRQ_EN)
 
+struct mtk_ecc_caps {
+	u32 err_mask;
+	const u8 *ecc_strength;
+	u8 num_ecc_strength;
+	u32 encode_parity_reg0;
+	int pg_irq_sel;
+};
+
 struct mtk_ecc {
 	struct device *dev;
+	const struct mtk_ecc_caps *caps;
 	void __iomem *regs;
 	struct clk *clk;
 
@@ -87,7 +75,18 @@ struct mtk_ecc {
 	struct mutex lock;
 	u32 sectors;
 
-	u8 eccdata[112];
+	u8 *eccdata;
+};
+
+/* ecc strength that each IP supports */
+static const u8 ecc_strength_mt2701[] = {
+	4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
+	40, 44, 48, 52, 56, 60
+};
+
+static const u8 ecc_strength_mt2712[] = {
+	4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
+	40, 44, 48, 52, 56, 60, 68, 72, 80
 };
 
 static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc,
@@ -136,77 +135,24 @@ static irqreturn_t mtk_ecc_irq(int irq, void *id)
 	return IRQ_HANDLED;
 }
 
-static void mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
+static int mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 {
-	u32 ecc_bit = ECC_CNFG_4BIT, dec_sz, enc_sz;
-	u32 reg;
+	u32 ecc_bit, dec_sz, enc_sz;
+	u32 reg, i;
 
-	switch (config->strength) {
-	case 4:
-		ecc_bit = ECC_CNFG_4BIT;
-		break;
-	case 6:
-		ecc_bit = ECC_CNFG_6BIT;
-		break;
-	case 8:
-		ecc_bit = ECC_CNFG_8BIT;
-		break;
-	case 10:
-		ecc_bit = ECC_CNFG_10BIT;
-		break;
-	case 12:
-		ecc_bit = ECC_CNFG_12BIT;
-		break;
-	case 14:
-		ecc_bit = ECC_CNFG_14BIT;
-		break;
-	case 16:
-		ecc_bit = ECC_CNFG_16BIT;
-		break;
-	case 18:
-		ecc_bit = ECC_CNFG_18BIT;
-		break;
-	case 20:
-		ecc_bit = ECC_CNFG_20BIT;
-		break;
-	case 22:
-		ecc_bit = ECC_CNFG_22BIT;
-		break;
-	case 24:
-		ecc_bit = ECC_CNFG_24BIT;
-		break;
-	case 28:
-		ecc_bit = ECC_CNFG_28BIT;
-		break;
-	case 32:
-		ecc_bit = ECC_CNFG_32BIT;
-		break;
-	case 36:
-		ecc_bit = ECC_CNFG_36BIT;
-		break;
-	case 40:
-		ecc_bit = ECC_CNFG_40BIT;
-		break;
-	case 44:
-		ecc_bit = ECC_CNFG_44BIT;
-		break;
-	case 48:
-		ecc_bit = ECC_CNFG_48BIT;
-		break;
-	case 52:
-		ecc_bit = ECC_CNFG_52BIT;
-		break;
-	case 56:
-		ecc_bit = ECC_CNFG_56BIT;
-		break;
-	case 60:
-		ecc_bit = ECC_CNFG_60BIT;
-		break;
-	default:
-		dev_err(ecc->dev, "invalid strength %d, default to 4 bits\n",
-			config->strength);
+	for (i = 0; i < ecc->caps->num_ecc_strength; i++) {
+		if (ecc->caps->ecc_strength[i] == config->strength)
+			break;
 	}
 
+	if (i == ecc->caps->num_ecc_strength) {
+		dev_err(ecc->dev, "invalid ecc strength %d\n",
+			config->strength);
+		return -EINVAL;
+	}
+
+	ecc_bit = i;
+
 	if (config->op == ECC_ENCODE) {
 		/* configure ECC encoder (in bits) */
 		enc_sz = config->len << 3;
@@ -232,6 +178,8 @@ static void mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 		if (config->sectors)
 			ecc->sectors = 1 << (config->sectors - 1);
 	}
+
+	return 0;
 }
 
 void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
@@ -247,8 +195,8 @@ void mtk_ecc_get_stats(struct mtk_ecc *ecc, struct mtk_ecc_stats *stats,
 		offset = (i >> 2) << 2;
 		err = readl(ecc->regs + ECC_DECENUM0 + offset);
 		err = err >> ((i % 4) * 8);
-		err &= ERR_MASK;
-		if (err == ERR_MASK) {
+		err &= ecc->caps->err_mask;
+		if (err == ecc->caps->err_mask) {
 			/* uncorrectable errors */
 			stats->failed++;
 			continue;
@@ -313,6 +261,7 @@ EXPORT_SYMBOL(of_mtk_ecc_get);
 int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 {
 	enum mtk_ecc_operation op = config->op;
+	u16 reg_val;
 	int ret;
 
 	ret = mutex_lock_interruptible(&ecc->lock);
@@ -322,11 +271,27 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config)
 	}
 
 	mtk_ecc_wait_idle(ecc, op);
-	mtk_ecc_config(ecc, config);
-	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
 
-	init_completion(&ecc->done);
-	writew(ECC_IRQ_EN, ecc->regs + ECC_IRQ_REG(op));
+	ret = mtk_ecc_config(ecc, config);
+	if (ret) {
+		mutex_unlock(&ecc->lock);
+		return ret;
+	}
+
+	if (config->mode != ECC_NFI_MODE || op != ECC_ENCODE) {
+		init_completion(&ecc->done);
+		reg_val = ECC_IRQ_EN;
+		/*
+		 * For ECC_NFI_MODE, if ecc->caps->pg_irq_sel is 1, then it
+		 * means this chip can only generate one ecc irq during page
+		 * read / write. If is 0, generate one ecc irq each ecc step.
+		 */
+		if (ecc->caps->pg_irq_sel && config->mode == ECC_NFI_MODE)
+			reg_val |= ECC_PG_IRQ_SEL;
+		writew(reg_val, ecc->regs + ECC_IRQ_REG(op));
+	}
+
+	writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op));
 
 	return 0;
 }
@@ -396,7 +361,9 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
 	len = (config->strength * ECC_PARITY_BITS + 7) >> 3;
 
 	/* write the parity bytes generated by the ECC back to temp buffer */
-	__ioread32_copy(ecc->eccdata, ecc->regs + ECC_ENCPAR(0), round_up(len, 4));
+	__ioread32_copy(ecc->eccdata,
+			ecc->regs + ecc->caps->encode_parity_reg0,
+			round_up(len, 4));
 
 	/* copy into possibly unaligned OOB region with actual length */
 	memcpy(data + bytes, ecc->eccdata, len);
@@ -409,37 +376,79 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
 }
 EXPORT_SYMBOL(mtk_ecc_encode);
 
-void mtk_ecc_adjust_strength(u32 *p)
+void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p)
 {
-	u32 ecc[] = {4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36,
-			40, 44, 48, 52, 56, 60};
+	const u8 *ecc_strength = ecc->caps->ecc_strength;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(ecc); i++) {
-		if (*p <= ecc[i]) {
+	for (i = 0; i < ecc->caps->num_ecc_strength; i++) {
+		if (*p <= ecc_strength[i]) {
 			if (!i)
-				*p = ecc[i];
-			else if (*p != ecc[i])
-				*p = ecc[i - 1];
+				*p = ecc_strength[i];
+			else if (*p != ecc_strength[i])
+				*p = ecc_strength[i - 1];
 			return;
 		}
 	}
 
-	*p = ecc[ARRAY_SIZE(ecc) - 1];
+	*p = ecc_strength[ecc->caps->num_ecc_strength - 1];
 }
 EXPORT_SYMBOL(mtk_ecc_adjust_strength);
 
+static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = {
+	.err_mask = 0x3f,
+	.ecc_strength = ecc_strength_mt2701,
+	.num_ecc_strength = 20,
+	.encode_parity_reg0 = 0x10,
+	.pg_irq_sel = 0,
+};
+
+static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = {
+	.err_mask = 0x7f,
+	.ecc_strength = ecc_strength_mt2712,
+	.num_ecc_strength = 23,
+	.encode_parity_reg0 = 0x300,
+	.pg_irq_sel = 1,
+};
+
+static const struct of_device_id mtk_ecc_dt_match[] = {
+	{
+		.compatible = "mediatek,mt2701-ecc",
+		.data = &mtk_ecc_caps_mt2701,
+	}, {
+		.compatible = "mediatek,mt2712-ecc",
+		.data = &mtk_ecc_caps_mt2712,
+	},
+	{},
+};
+
 static int mtk_ecc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct mtk_ecc *ecc;
 	struct resource *res;
+	const struct of_device_id *of_ecc_id = NULL;
+	u32 max_eccdata_size;
 	int irq, ret;
 
 	ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
 	if (!ecc)
 		return -ENOMEM;
 
+	of_ecc_id = of_match_device(mtk_ecc_dt_match, &pdev->dev);
+	if (!of_ecc_id)
+		return -ENODEV;
+
+	ecc->caps = of_ecc_id->data;
+
+	max_eccdata_size = ecc->caps->num_ecc_strength - 1;
+	max_eccdata_size = ecc->caps->ecc_strength[max_eccdata_size];
+	max_eccdata_size = (max_eccdata_size * ECC_PARITY_BITS + 7) >> 3;
+	max_eccdata_size = round_up(max_eccdata_size, 4);
+	ecc->eccdata = devm_kzalloc(dev, max_eccdata_size, GFP_KERNEL);
+	if (!ecc->eccdata)
+		return -ENOMEM;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ecc->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ecc->regs)) {
@@ -500,19 +509,12 @@ static int mtk_ecc_resume(struct device *dev)
 		return ret;
 	}
 
-	mtk_ecc_hw_init(ecc);
-
 	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(mtk_ecc_pm_ops, mtk_ecc_suspend, mtk_ecc_resume);
 #endif
 
-static const struct of_device_id mtk_ecc_dt_match[] = {
-	{ .compatible = "mediatek,mt2701-ecc" },
-	{},
-};
-
 MODULE_DEVICE_TABLE(of, mtk_ecc_dt_match);
 
 static struct platform_driver mtk_ecc_driver = {

diff --git a/drivers/mtd/nand/mtk_ecc.h b/drivers/mtd/nand/mtk_ecc.h
index cbeba5c..d245c14 100644
--- a/drivers/mtd/nand/mtk_ecc.h
+++ b/drivers/mtd/nand/mtk_ecc.h

@@ -42,7 +42,7 @@ void mtk_ecc_get_stats(struct mtk_ecc *, struct mtk_ecc_stats *, int);
 int mtk_ecc_wait_done(struct mtk_ecc *, enum mtk_ecc_operation);
 int mtk_ecc_enable(struct mtk_ecc *, struct mtk_ecc_config *);
 void mtk_ecc_disable(struct mtk_ecc *);
-void mtk_ecc_adjust_strength(u32 *);
+void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p);
 
 struct mtk_ecc *of_mtk_ecc_get(struct device_node *);
 void mtk_ecc_release(struct mtk_ecc *);

diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index 6c517c6..f7ae994 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c

@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/iopoll.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include "mtk_ecc.h"
 
 /* NAND controller register definition */
@@ -38,23 +39,6 @@
 #define NFI_PAGEFMT		(0x04)
 #define		PAGEFMT_FDM_ECC_SHIFT	(12)
 #define		PAGEFMT_FDM_SHIFT	(8)
-#define		PAGEFMT_SPARE_16	(0)
-#define		PAGEFMT_SPARE_26	(1)
-#define		PAGEFMT_SPARE_27	(2)
-#define		PAGEFMT_SPARE_28	(3)
-#define		PAGEFMT_SPARE_32	(4)
-#define		PAGEFMT_SPARE_36	(5)
-#define		PAGEFMT_SPARE_40	(6)
-#define		PAGEFMT_SPARE_44	(7)
-#define		PAGEFMT_SPARE_48	(8)
-#define		PAGEFMT_SPARE_49	(9)
-#define		PAGEFMT_SPARE_50	(0xa)
-#define		PAGEFMT_SPARE_51	(0xb)
-#define		PAGEFMT_SPARE_52	(0xc)
-#define		PAGEFMT_SPARE_62	(0xd)
-#define		PAGEFMT_SPARE_63	(0xe)
-#define		PAGEFMT_SPARE_64	(0xf)
-#define		PAGEFMT_SPARE_SHIFT	(4)
 #define		PAGEFMT_SEC_SEL_512	BIT(2)
 #define		PAGEFMT_512_2K		(0)
 #define		PAGEFMT_2K_4K		(1)
@@ -115,6 +99,17 @@
 #define MTK_RESET_TIMEOUT	(1000000)
 #define MTK_MAX_SECTOR		(16)
 #define MTK_NAND_MAX_NSELS	(2)
+#define MTK_NFC_MIN_SPARE	(16)
+#define ACCTIMING(tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt) \
+	((tpoecs) << 28 | (tprecs) << 22 | (tc2r) << 16 | \
+	(tw2r) << 12 | (twh) << 8 | (twst) << 4 | (trlt))
+
+struct mtk_nfc_caps {
+	const u8 *spare_size;
+	u8 num_spare_size;
+	u8 pageformat_spare_shift;
+	u8 nfi_clk_div;
+};
 
 struct mtk_nfc_bad_mark_ctl {
 	void (*bm_swap)(struct mtd_info *, u8 *buf, int raw);
@@ -155,6 +150,7 @@ struct mtk_nfc {
 	struct mtk_ecc *ecc;
 
 	struct device *dev;
+	const struct mtk_nfc_caps *caps;
 	void __iomem *regs;
 
 	struct completion done;
@@ -163,6 +159,20 @@ struct mtk_nfc {
 	u8 *buffer;
 };
 
+/*
+ * supported spare size of each IP.
+ * order should be the same with the spare size bitfiled defination of
+ * register NFI_PAGEFMT.
+ */
+static const u8 spare_size_mt2701[] = {
+	16, 26, 27, 28, 32, 36, 40, 44,	48, 49, 50, 51, 52, 62, 63, 64
+};
+
+static const u8 spare_size_mt2712[] = {
+	16, 26, 27, 28, 32, 36, 40, 44, 48, 49, 50, 51, 52, 62, 61, 63, 64, 67,
+	74
+};
+
 static inline struct mtk_nfc_nand_chip *to_mtk_nand(struct nand_chip *nand)
 {
 	return container_of(nand, struct mtk_nfc_nand_chip, nand);
@@ -308,7 +318,7 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
-	u32 fmt, spare;
+	u32 fmt, spare, i;
 
 	if (!mtd->writesize)
 		return 0;
@@ -352,63 +362,21 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 	if (chip->ecc.size == 1024)
 		spare >>= 1;
 
-	switch (spare) {
-	case 16:
-		fmt |= (PAGEFMT_SPARE_16 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 26:
-		fmt |= (PAGEFMT_SPARE_26 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 27:
-		fmt |= (PAGEFMT_SPARE_27 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 28:
-		fmt |= (PAGEFMT_SPARE_28 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 32:
-		fmt |= (PAGEFMT_SPARE_32 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 36:
-		fmt |= (PAGEFMT_SPARE_36 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 40:
-		fmt |= (PAGEFMT_SPARE_40 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 44:
-		fmt |= (PAGEFMT_SPARE_44 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 48:
-		fmt |= (PAGEFMT_SPARE_48 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 49:
-		fmt |= (PAGEFMT_SPARE_49 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 50:
-		fmt |= (PAGEFMT_SPARE_50 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 51:
-		fmt |= (PAGEFMT_SPARE_51 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 52:
-		fmt |= (PAGEFMT_SPARE_52 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 62:
-		fmt |= (PAGEFMT_SPARE_62 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 63:
-		fmt |= (PAGEFMT_SPARE_63 << PAGEFMT_SPARE_SHIFT);
-		break;
-	case 64:
-		fmt |= (PAGEFMT_SPARE_64 << PAGEFMT_SPARE_SHIFT);
-		break;
-	default:
-		dev_err(nfc->dev, "invalid spare per sector %d\n", spare);
+	for (i = 0; i < nfc->caps->num_spare_size; i++) {
+		if (nfc->caps->spare_size[i] == spare)
+			break;
+	}
+
+	if (i == nfc->caps->num_spare_size) {
+		dev_err(nfc->dev, "invalid spare size %d\n", spare);
 		return -EINVAL;
 	}
 
+	fmt |= i << nfc->caps->pageformat_spare_shift;
+
 	fmt |= mtk_nand->fdm.reg_size << PAGEFMT_FDM_SHIFT;
 	fmt |= mtk_nand->fdm.ecc_size << PAGEFMT_FDM_ECC_SHIFT;
-	nfi_writew(nfc, fmt, NFI_PAGEFMT);
+	nfi_writel(nfc, fmt, NFI_PAGEFMT);
 
 	nfc->ecc_cfg.strength = chip->ecc.strength;
 	nfc->ecc_cfg.len = chip->ecc.size + mtk_nand->fdm.ecc_size;
@@ -531,6 +499,74 @@ static void mtk_nfc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
 		mtk_nfc_write_byte(mtd, buf[i]);
 }
 
+static int mtk_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
+{
+	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	const struct nand_sdr_timings *timings;
+	u32 rate, tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt;
+
+	timings = nand_get_sdr_timings(conf);
+	if (IS_ERR(timings))
+		return -ENOTSUPP;
+
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
+		return 0;
+
+	rate = clk_get_rate(nfc->clk.nfi_clk);
+	/* There is a frequency divider in some IPs */
+	rate /= nfc->caps->nfi_clk_div;
+
+	/* turn clock rate into KHZ */
+	rate /= 1000;
+
+	tpoecs = max(timings->tALH_min, timings->tCLH_min) / 1000;
+	tpoecs = DIV_ROUND_UP(tpoecs * rate, 1000000);
+	tpoecs &= 0xf;
+
+	tprecs = max(timings->tCLS_min, timings->tALS_min) / 1000;
+	tprecs = DIV_ROUND_UP(tprecs * rate, 1000000);
+	tprecs &= 0x3f;
+
+	/* sdr interface has no tCR which means CE# low to RE# low */
+	tc2r = 0;
+
+	tw2r = timings->tWHR_min / 1000;
+	tw2r = DIV_ROUND_UP(tw2r * rate, 1000000);
+	tw2r = DIV_ROUND_UP(tw2r - 1, 2);
+	tw2r &= 0xf;
+
+	twh = max(timings->tREH_min, timings->tWH_min) / 1000;
+	twh = DIV_ROUND_UP(twh * rate, 1000000) - 1;
+	twh &= 0xf;
+
+	twst = timings->tWP_min / 1000;
+	twst = DIV_ROUND_UP(twst * rate, 1000000) - 1;
+	twst &= 0xf;
+
+	trlt = max(timings->tREA_max, timings->tRP_min) / 1000;
+	trlt = DIV_ROUND_UP(trlt * rate, 1000000) - 1;
+	trlt &= 0xf;
+
+	/*
+	 * ACCON: access timing control register
+	 * -------------------------------------
+	 * 31:28: tpoecs, minimum required time for CS post pulling down after
+	 *        accessing the device
+	 * 27:22: tprecs, minimum required time for CS pre pulling down before
+	 *        accessing the device
+	 * 21:16: tc2r, minimum required time from NCEB low to NREB low
+	 * 15:12: tw2r, minimum required time from NWEB high to NREB low.
+	 * 11:08: twh, write enable hold time
+	 * 07:04: twst, write wait states
+	 * 03:00: trlt, read wait states
+	 */
+	trlt = ACCTIMING(tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt);
+	nfi_writel(nfc, trlt, NFI_ACCCON);
+
+	return 0;
+}
+
 static int mtk_nfc_sector_encode(struct nand_chip *chip, u8 *data)
 {
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
@@ -988,28 +1024,13 @@ static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
 static inline void mtk_nfc_hw_init(struct mtk_nfc *nfc)
 {
 	/*
-	 * ACCON: access timing control register
-	 * -------------------------------------
-	 * 31:28: minimum required time for CS post pulling down after accessing
-	 *	the device
-	 * 27:22: minimum required time for CS pre pulling down before accessing
-	 *	the device
-	 * 21:16: minimum required time from NCEB low to NREB low
-	 * 15:12: minimum required time from NWEB high to NREB low.
-	 * 11:08: write enable hold time
-	 * 07:04: write wait states
-	 * 03:00: read wait states
-	 */
-	nfi_writel(nfc, 0x10804211, NFI_ACCCON);
-
-	/*
 	 * CNRNB: nand ready/busy register
 	 * -------------------------------
 	 * 7:4: timeout register for polling the NAND busy/ready signal
 	 * 0  : poll the status of the busy/ready signal after [7:4]*16 cycles.
 	 */
 	nfi_writew(nfc, 0xf1, NFI_CNRNB);
-	nfi_writew(nfc, PAGEFMT_8K_16K, NFI_PAGEFMT);
+	nfi_writel(nfc, PAGEFMT_8K_16K, NFI_PAGEFMT);
 
 	mtk_nfc_hw_reset(nfc);
 
@@ -1131,12 +1152,12 @@ static void mtk_nfc_set_bad_mark_ctl(struct mtk_nfc_bad_mark_ctl *bm_ctl,
 	}
 }
 
-static void mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
+static int mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
-	u32 spare[] = {16, 26, 27, 28, 32, 36, 40, 44,
-			48, 49, 50, 51, 52, 62, 63, 64};
-	u32 eccsteps, i;
+	struct mtk_nfc *nfc = nand_get_controller_data(nand);
+	const u8 *spare = nfc->caps->spare_size;
+	u32 eccsteps, i, closest_spare = 0;
 
 	eccsteps = mtd->writesize / nand->ecc.size;
 	*sps = mtd->oobsize / eccsteps;
@@ -1144,28 +1165,31 @@ static void mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd)
 	if (nand->ecc.size == 1024)
 		*sps >>= 1;
 
-	for (i = 0; i < ARRAY_SIZE(spare); i++) {
-		if (*sps <= spare[i]) {
-			if (!i)
-				*sps = spare[i];
-			else if (*sps != spare[i])
-				*sps = spare[i - 1];
-			break;
+	if (*sps < MTK_NFC_MIN_SPARE)
+		return -EINVAL;
+
+	for (i = 0; i < nfc->caps->num_spare_size; i++) {
+		if (*sps >= spare[i] && spare[i] >= spare[closest_spare]) {
+			closest_spare = i;
+			if (*sps == spare[i])
+				break;
 		}
 	}
 
-	if (i >= ARRAY_SIZE(spare))
-		*sps = spare[ARRAY_SIZE(spare) - 1];
+	*sps = spare[closest_spare];
 
 	if (nand->ecc.size == 1024)
 		*sps <<= 1;
+
+	return 0;
 }
 
 static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtk_nfc *nfc = nand_get_controller_data(nand);
 	u32 spare;
-	int free;
+	int free, ret;
 
 	/* support only ecc hw mode */
 	if (nand->ecc.mode != NAND_ECC_HW) {
@@ -1194,7 +1218,9 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 			nand->ecc.size = 1024;
 		}
 
-		mtk_nfc_set_spare_per_sector(&spare, mtd);
+		ret = mtk_nfc_set_spare_per_sector(&spare, mtd);
+		if (ret)
+			return ret;
 
 		/* calculate oob bytes except ecc parity data */
 		free = ((nand->ecc.strength * ECC_PARITY_BITS) + 7) >> 3;
@@ -1214,7 +1240,7 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd)
 		}
 	}
 
-	mtk_ecc_adjust_strength(&nand->ecc.strength);
+	mtk_ecc_adjust_strength(nfc->ecc, &nand->ecc.strength);
 
 	dev_info(dev, "eccsize %d eccstrength %d\n",
 		 nand->ecc.size, nand->ecc.strength);
@@ -1271,6 +1297,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	nand->read_byte = mtk_nfc_read_byte;
 	nand->read_buf = mtk_nfc_read_buf;
 	nand->cmd_ctrl = mtk_nfc_cmd_ctrl;
+	nand->setup_data_interface = mtk_nfc_setup_data_interface;
 
 	/* set default mode in case dt entry is missing */
 	nand->ecc.mode = NAND_ECC_HW;
@@ -1312,7 +1339,10 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 		return -EINVAL;
 	}
 
-	mtk_nfc_set_spare_per_sector(&chip->spare_per_sector, mtd);
+	ret = mtk_nfc_set_spare_per_sector(&chip->spare_per_sector, mtd);
+	if (ret)
+		return ret;
+
 	mtk_nfc_set_fdm(&chip->fdm, mtd);
 	mtk_nfc_set_bad_mark_ctl(&chip->bad_mark, mtd);
 
@@ -1354,12 +1384,39 @@ static int mtk_nfc_nand_chips_init(struct device *dev, struct mtk_nfc *nfc)
 	return 0;
 }
 
+static const struct mtk_nfc_caps mtk_nfc_caps_mt2701 = {
+	.spare_size = spare_size_mt2701,
+	.num_spare_size = 16,
+	.pageformat_spare_shift = 4,
+	.nfi_clk_div = 1,
+};
+
+static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = {
+	.spare_size = spare_size_mt2712,
+	.num_spare_size = 19,
+	.pageformat_spare_shift = 16,
+	.nfi_clk_div = 2,
+};
+
+static const struct of_device_id mtk_nfc_id_table[] = {
+	{
+		.compatible = "mediatek,mt2701-nfc",
+		.data = &mtk_nfc_caps_mt2701,
+	}, {
+		.compatible = "mediatek,mt2712-nfc",
+		.data = &mtk_nfc_caps_mt2712,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mtk_nfc_id_table);
+
 static int mtk_nfc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct mtk_nfc *nfc;
 	struct resource *res;
+	const struct of_device_id *of_nfc_id = NULL;
 	int ret, irq;
 
 	nfc = devm_kzalloc(dev, sizeof(*nfc), GFP_KERNEL);
@@ -1423,6 +1480,14 @@ static int mtk_nfc_probe(struct platform_device *pdev)
 		goto clk_disable;
 	}
 
+	of_nfc_id = of_match_device(mtk_nfc_id_table, &pdev->dev);
+	if (!of_nfc_id) {
+		ret = -ENODEV;
+		goto clk_disable;
+	}
+
+	nfc->caps = of_nfc_id->data;
+
 	platform_set_drvdata(pdev, nfc);
 
 	ret = mtk_nfc_nand_chips_init(dev, nfc);
@@ -1485,8 +1550,6 @@ static int mtk_nfc_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	mtk_nfc_hw_init(nfc);
-
 	/* reset NAND chip if VCC was powered off */
 	list_for_each_entry(chip, &nfc->chips, node) {
 		nand = &chip->nand;
@@ -1503,12 +1566,6 @@ static int mtk_nfc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(mtk_nfc_pm_ops, mtk_nfc_suspend, mtk_nfc_resume);
 #endif
 
-static const struct of_device_id mtk_nfc_id_table[] = {
-	{ .compatible = "mediatek,mt2701-nfc" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, mtk_nfc_id_table);
-
 static struct platform_driver mtk_nfc_driver = {
 	.probe  = mtk_nfc_probe,
 	.remove = mtk_nfc_remove,

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 61ca020..a764d5c 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c

@@ -152,9 +152,8 @@ struct mxc_nand_devtype_data {
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	int (*correct_data)(struct mtd_info *mtd, u_char *dat,
 			u_char *read_ecc, u_char *calc_ecc);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int csline,
+				    const struct nand_data_interface *conf);
 
 	/*
 	 * On i.MX21 the CONFIG2:INT bit cannot be read if interrupts are masked
@@ -1015,9 +1014,8 @@ static void preset_v1(struct mtd_info *mtd)
 	writew(0x4, NFC_V1_V2_WRPROT);
 }
 
-static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
@@ -1075,7 +1073,7 @@ static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd,
 		return -EINVAL;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	ret = clk_set_rate(host->clk, rate);

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index bf8486c..5fa5ddc 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -755,6 +755,16 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 		return;
 
 		/* This applies to read commands */
+	case NAND_CMD_READ0:
+		/*
+		 * READ0 is sometimes used to exit GET STATUS mode. When this
+		 * is the case no address cycles are requested, and we can use
+		 * this information to detect that we should not wait for the
+		 * device to be ready.
+		 */
+		if (column == -1 && page_addr == -1)
+			return;
+
 	default:
 		/*
 		 * If we don't have access to the busy pin, we apply the given
@@ -889,6 +899,15 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 		return;
 
 	case NAND_CMD_READ0:
+		/*
+		 * READ0 is sometimes used to exit GET STATUS mode. When this
+		 * is the case no address cycles are requested, and we can use
+		 * this information to detect that READSTART should not be
+		 * issued.
+		 */
+		if (column == -1 && page_addr == -1)
+			return;
+
 		chip->cmd_ctrl(mtd, NAND_CMD_READSTART,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
@@ -1044,12 +1063,13 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 /**
  * nand_reset_data_interface - Reset data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Reset the Data interface and timings to ONFI mode 0.
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_reset_data_interface(struct nand_chip *chip)
+static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_data_interface *conf;
@@ -1073,7 +1093,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 	 */
 
 	conf = nand_get_default_data_interface();
-	ret = chip->setup_data_interface(mtd, conf, false);
+	ret = chip->setup_data_interface(mtd, chipnr, conf);
 	if (ret)
 		pr_err("Failed to configure data interface to SDR timing mode 0\n");
 
@@ -1083,6 +1103,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
 /**
  * nand_setup_data_interface - Setup the best data interface and timings
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Find and configure the best data interface and NAND timings supported by
  * the chip and the driver.
@@ -1092,7 +1113,7 @@ static int nand_reset_data_interface(struct nand_chip *chip)
  *
  * Returns 0 for success or negative error code otherwise.
  */
-static int nand_setup_data_interface(struct nand_chip *chip)
+static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
@@ -1116,7 +1137,7 @@ static int nand_setup_data_interface(struct nand_chip *chip)
 			goto err;
 	}
 
-	ret = chip->setup_data_interface(mtd, chip->data_interface, false);
+	ret = chip->setup_data_interface(mtd, chipnr, chip->data_interface);
 err:
 	return ret;
 }
@@ -1167,8 +1188,10 @@ static int nand_init_data_interface(struct nand_chip *chip)
 		if (ret)
 			continue;
 
-		ret = chip->setup_data_interface(mtd, chip->data_interface,
-						 true);
+		/* Pass -1 to only */
+		ret = chip->setup_data_interface(mtd,
+						 NAND_DATA_IFACE_CHECK_ONLY,
+						 chip->data_interface);
 		if (!ret) {
 			chip->onfi_timing_mode_default = mode;
 			break;
@@ -1195,7 +1218,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	ret = nand_reset_data_interface(chip);
+	ret = nand_reset_data_interface(chip, chipnr);
 	if (ret)
 		return ret;
 
@@ -1208,7 +1231,7 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	chip->select_chip(mtd, -1);
 
 	chip->select_chip(mtd, chipnr);
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, chipnr);
 	chip->select_chip(mtd, -1);
 	if (ret)
 		return ret;
@@ -1424,7 +1447,10 @@ static int nand_check_erased_buf(void *buf, int len, int bitflips_threshold)
 
 	for (; len >= sizeof(long);
 	     len -= sizeof(long), bitmap += sizeof(long)) {
-		weight = hweight_long(*((unsigned long *)bitmap));
+		unsigned long d = *((unsigned long *)bitmap);
+		if (d == ~0UL)
+			continue;
+		weight = hweight_long(d);
 		bitflips += BITS_PER_LONG - weight;
 		if (unlikely(bitflips > bitflips_threshold))
 			return -EBADMSG;
@@ -1527,14 +1553,15 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
 		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
 	return 0;
 }
+EXPORT_SYMBOL(nand_read_page_raw);
 
 /**
  * nand_read_page_raw_syndrome - [INTERN] read raw page data without ecc
@@ -2472,8 +2499,8 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	if (oob_required)
@@ -2481,6 +2508,7 @@ static int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 
 	return 0;
 }
+EXPORT_SYMBOL(nand_write_page_raw);
 
 /**
  * nand_write_page_raw_syndrome - [INTERN] raw page write function
@@ -2718,7 +2746,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
  */
 static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 		uint32_t offset, int data_len, const uint8_t *buf,
-		int oob_required, int page, int cached, int raw)
+		int oob_required, int page, int raw)
 {
 	int status, subpage;
 
@@ -2744,30 +2772,12 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (status < 0)
 		return status;
 
-	/*
-	 * Cached progamming disabled for now. Not sure if it's worth the
-	 * trouble. The speed gain is not very impressive. (2.3->2.6Mib/s).
-	 */
-	cached = 0;
+	if (nand_standard_page_accessors(&chip->ecc)) {
+		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 
-	if (!cached || !NAND_HAS_CACHEPROG(chip)) {
-
-		if (nand_standard_page_accessors(&chip->ecc))
-			chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(mtd, chip);
-		/*
-		 * See if operation failed and additional status checks are
-		 * available.
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_WRITING, status,
-					       page);
-
 		if (status & NAND_STATUS_FAIL)
 			return -EIO;
-	} else {
-		chip->cmdfunc(mtd, NAND_CMD_CACHEDPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
 	}
 
 	return 0;
@@ -2875,7 +2885,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 
 	while (1) {
 		int bytes = mtd->writesize;
-		int cached = writelen > bytes && page != blockmask;
 		uint8_t *wbuf = buf;
 		int use_bufpoi;
 		int part_pagewr = (column || writelen < mtd->writesize);
@@ -2893,7 +2902,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		if (use_bufpoi) {
 			pr_debug("%s: using write bounce buffer for buf@%p\n",
 					 __func__, buf);
-			cached = 0;
 			if (part_pagewr)
 				bytes = min_t(int, bytes - column, writelen);
 			chip->pagebuf = -1;
@@ -2912,7 +2920,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		}
 
 		ret = nand_write_page(mtd, chip, column, bytes, wbuf,
-				      oob_required, page, cached,
+				      oob_required, page,
 				      (ops->mode == MTD_OPS_RAW));
 		if (ret)
 			break;
@@ -3228,14 +3236,6 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 
 		status = chip->erase(mtd, page & chip->pagemask);
 
-		/*
-		 * See if operation failed and additional status checks are
-		 * available
-		 */
-		if ((status & NAND_STATUS_FAIL) && (chip->errstat))
-			status = chip->errstat(mtd, chip, FL_ERASING,
-					       status, page);
-
 		/* See if block erase succeeded */
 		if (status & NAND_STATUS_FAIL) {
 			pr_debug("%s: failed erase, page 0x%08x\n",
@@ -3422,6 +3422,25 @@ static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /**
+ * nand_onfi_get_set_features_notsupp - set/get features stub returning
+ *					-ENOTSUPP
+ * @mtd: MTD device structure
+ * @chip: nand chip info structure
+ * @addr: feature address.
+ * @subfeature_param: the subfeature parameters, a four bytes array.
+ *
+ * Should be used by NAND controller drivers that do not support the SET/GET
+ * FEATURES operations.
+ */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_onfi_get_set_features_notsupp);
+
+/**
  * nand_suspend - [MTD Interface] Suspend the NAND flash
  * @mtd: MTD device structure
  */
@@ -4180,6 +4199,7 @@ static const char * const nand_ecc_modes[] = {
 	[NAND_ECC_HW]		= "hw",
 	[NAND_ECC_HW_SYNDROME]	= "hw_syndrome",
 	[NAND_ECC_HW_OOB_FIRST]	= "hw_oob_first",
+	[NAND_ECC_ON_DIE]	= "on-die",
 };
 
 static int of_get_nand_ecc_mode(struct device_node *np)
@@ -4374,7 +4394,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	 * For the other dies, nand_reset() will automatically switch to the
 	 * best mode for us.
 	 */
-	ret = nand_setup_data_interface(chip);
+	ret = nand_setup_data_interface(chip, 0);
 	if (ret)
 		goto err_nand_init;
 
@@ -4512,6 +4532,226 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 	}
 }
 
+/**
+ * nand_check_ecc_caps - check the sanity of preset ECC settings
+ * @chip: nand chip info structure
+ * @caps: ECC caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * When ECC step size and strength are already set, check if they are supported
+ * by the controller and the calculated ECC bytes fit within the chip's OOB.
+ * On success, the calculated ECC bytes is set.
+ */
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int preset_step = chip->ecc.size;
+	int preset_strength = chip->ecc.strength;
+	int nsteps, ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	if (!preset_step || !preset_strength)
+		return -ENODATA;
+
+	nsteps = mtd->writesize / preset_step;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+
+		if (stepinfo->stepsize != preset_step)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			if (stepinfo->strengths[j] != preset_strength)
+				continue;
+
+			ecc_bytes = caps->calc_ecc_bytes(preset_step,
+							 preset_strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				return ecc_bytes;
+
+			if (ecc_bytes * nsteps > oobavail) {
+				pr_err("ECC (step, strength) = (%d, %d) does not fit in OOB",
+				       preset_step, preset_strength);
+				return -ENOSPC;
+			}
+
+			chip->ecc.bytes = ecc_bytes;
+
+			return 0;
+		}
+	}
+
+	pr_err("ECC (step, strength) = (%d, %d) not supported on this controller",
+	       preset_step, preset_strength);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
+
+/**
+ * nand_match_ecc_req - meet the chip's requirement with least ECC bytes
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * If a chip's ECC requirement is provided, try to meet it with the least
+ * number of ECC bytes (i.e. with the largest number of OOB-free bytes).
+ * On success, the chosen ECC settings are set.
+ */
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int req_step = chip->ecc_step_ds;
+	int req_strength = chip->ecc_strength_ds;
+	int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total;
+	int best_step, best_strength, best_ecc_bytes;
+	int best_ecc_bytes_total = INT_MAX;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	/* No information provided by the NAND chip */
+	if (!req_step || !req_strength)
+		return -ENOTSUPP;
+
+	/* number of correctable bits the chip requires in a page */
+	req_corr = mtd->writesize / req_step * req_strength;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			/*
+			 * If both step size and strength are smaller than the
+			 * chip's requirement, it is not easy to compare the
+			 * resulted reliability.
+			 */
+			if (step_size < req_step && strength < req_strength)
+				continue;
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+			ecc_bytes_total = ecc_bytes * nsteps;
+
+			if (ecc_bytes_total > oobavail ||
+			    strength * nsteps < req_corr)
+				continue;
+
+			/*
+			 * We assume the best is to meet the chip's requrement
+			 * with the least number of ECC bytes.
+			 */
+			if (ecc_bytes_total < best_ecc_bytes_total) {
+				best_ecc_bytes_total = ecc_bytes_total;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (best_ecc_bytes_total == INT_MAX)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_match_ecc_req);
+
+/**
+ * nand_maximize_ecc - choose the max ECC strength available
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * Choose the max ECC strength that is supported on the controller, and can fit
+ * within the chip's OOB.  On success, the chosen ECC settings are set.
+ */
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	const struct nand_ecc_step_info *stepinfo;
+	int step_size, strength, nsteps, ecc_bytes, corr;
+	int best_corr = 0;
+	int best_step = 0;
+	int best_strength, best_ecc_bytes;
+	int i, j;
+
+	if (WARN_ON(oobavail < 0))
+		return -EINVAL;
+
+	for (i = 0; i < caps->nstepinfos; i++) {
+		stepinfo = &caps->stepinfos[i];
+		step_size = stepinfo->stepsize;
+
+		/* If chip->ecc.size is already set, respect it */
+		if (chip->ecc.size && step_size != chip->ecc.size)
+			continue;
+
+		for (j = 0; j < stepinfo->nstrengths; j++) {
+			strength = stepinfo->strengths[j];
+
+			if (mtd->writesize % step_size)
+				continue;
+
+			nsteps = mtd->writesize / step_size;
+
+			ecc_bytes = caps->calc_ecc_bytes(step_size, strength);
+			if (WARN_ON_ONCE(ecc_bytes < 0))
+				continue;
+
+			if (ecc_bytes * nsteps > oobavail)
+				continue;
+
+			corr = strength * nsteps;
+
+			/*
+			 * If the number of correctable bits is the same,
+			 * bigger step_size has more reliability.
+			 */
+			if (corr > best_corr ||
+			    (corr == best_corr && step_size > best_step)) {
+				best_corr = corr;
+				best_step = step_size;
+				best_strength = strength;
+				best_ecc_bytes = ecc_bytes;
+			}
+		}
+	}
+
+	if (!best_corr)
+		return -ENOTSUPP;
+
+	chip->ecc.size = best_step;
+	chip->ecc.strength = best_strength;
+	chip->ecc.bytes = best_ecc_bytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nand_maximize_ecc);
+
 /*
  * Check if the chip configuration meet the datasheet requirements.
 
@@ -4733,6 +4973,18 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 		break;
 
+	case NAND_ECC_ON_DIE:
+		if (!ecc->read_page || !ecc->write_page) {
+			WARN(1, "No ECC functions supplied; on-die ECC not possible\n");
+			ret = -EINVAL;
+			goto err_free;
+		}
+		if (!ecc->read_oob)
+			ecc->read_oob = nand_read_oob_std;
+		if (!ecc->write_oob)
+			ecc->write_oob = nand_write_oob_std;
+		break;
+
 	case NAND_ECC_NONE:
 		pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n");
 		ecc->read_page = nand_read_page_raw;
@@ -4773,6 +5025,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 		goto err_free;
 	}
 	ecc->total = ecc->steps * ecc->bytes;
+	if (ecc->total > mtd->oobsize) {
+		WARN(1, "Total number of ECC bytes exceeded oobsize\n");
+		ret = -EINVAL;
+		goto err_free;
+	}
 
 	/*
 	 * The number of bytes available for a client to place data into

diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c
index 8770110..c30ab60 100644
--- a/drivers/mtd/nand/nand_micron.c
+++ b/drivers/mtd/nand/nand_micron.c

@@ -17,6 +17,12 @@
 
 #include <linux/mtd/nand.h>
 
+/*
+ * Special Micron status bit that indicates when the block has been
+ * corrected by on-die ECC and should be rewritten
+ */
+#define NAND_STATUS_WRITE_RECOMMENDED	BIT(3)
+
 struct nand_onfi_vendor_micron {
 	u8 two_plane_read;
 	u8 read_cache;
@@ -66,9 +72,197 @@ static int micron_nand_onfi_init(struct nand_chip *chip)
 	return 0;
 }
 
+static int micron_nand_on_die_ooblayout_ecc(struct mtd_info *mtd, int section,
+					    struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 8;
+	oobregion->length = 8;
+
+	return 0;
+}
+
+static int micron_nand_on_die_ooblayout_free(struct mtd_info *mtd, int section,
+					     struct mtd_oob_region *oobregion)
+{
+	if (section >= 4)
+		return -ERANGE;
+
+	oobregion->offset = (section * 16) + 2;
+	oobregion->length = 6;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops micron_nand_on_die_ooblayout_ops = {
+	.ecc = micron_nand_on_die_ooblayout_ecc,
+	.free = micron_nand_on_die_ooblayout_free,
+};
+
+static int micron_nand_on_die_ecc_setup(struct nand_chip *chip, bool enable)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+
+	if (enable)
+		feature[0] |= ONFI_FEATURE_ON_DIE_ECC_EN;
+
+	return chip->onfi_set_features(nand_to_mtd(chip), chip,
+				       ONFI_FEATURE_ON_DIE_ECC, feature);
+}
+
+static int
+micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				 uint8_t *buf, int oob_required,
+				 int page)
+{
+	int status;
+	int max_bitflips = 0;
+
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	status = chip->read_byte(mtd);
+	if (status & NAND_STATUS_FAIL)
+		mtd->ecc_stats.failed++;
+	/*
+	 * The internal ECC doesn't tell us the number of bitflips
+	 * that have been corrected, but tells us if it recommends to
+	 * rewrite the block. If it's the case, then we pretend we had
+	 * a number of bitflips equal to the ECC strength, which will
+	 * hint the NAND core to rewrite the block.
+	 */
+	else if (status & NAND_STATUS_WRITE_RECOMMENDED)
+		max_bitflips = chip->ecc.strength;
+
+	chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1);
+
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return max_bitflips;
+}
+
+static int
+micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
+				  const uint8_t *buf, int oob_required,
+				  int page)
+{
+	int status;
+
+	micron_nand_on_die_ecc_setup(chip, true);
+
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
+
+	micron_nand_on_die_ecc_setup(chip, false);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+static int
+micron_nand_read_page_raw_on_die_ecc(struct mtd_info *mtd,
+				     struct nand_chip *chip,
+				     uint8_t *buf, int oob_required,
+				     int page)
+{
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
+	nand_read_page_raw(mtd, chip, buf, oob_required, page);
+
+	return 0;
+}
+
+static int
+micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd,
+				      struct nand_chip *chip,
+				      const uint8_t *buf, int oob_required,
+				      int page)
+{
+	int status;
+
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+enum {
+	/* The NAND flash doesn't support on-die ECC */
+	MICRON_ON_DIE_UNSUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC and it can be
+	 * enabled/disabled by a set features command.
+	 */
+	MICRON_ON_DIE_SUPPORTED,
+
+	/*
+	 * The NAND flash supports on-die ECC, and it cannot be
+	 * disabled.
+	 */
+	MICRON_ON_DIE_MANDATORY,
+};
+
+/*
+ * Try to detect if the NAND support on-die ECC. To do this, we enable
+ * the feature, and read back if it has been enabled as expected. We
+ * also check if it can be disabled, because some Micron NANDs do not
+ * allow disabling the on-die ECC and we don't support such NANDs for
+ * now.
+ *
+ * This function also has the side effect of disabling on-die ECC if
+ * it had been left enabled by the firmware/bootloader.
+ */
+static int micron_supports_on_die_ecc(struct nand_chip *chip)
+{
+	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = { 0, };
+	int ret;
+
+	if (chip->onfi_version == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	if (chip->bits_per_cell != 1)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, true);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if ((feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN) == 0)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	ret = micron_nand_on_die_ecc_setup(chip, false);
+	if (ret)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	chip->onfi_get_features(nand_to_mtd(chip), chip,
+				ONFI_FEATURE_ON_DIE_ECC, feature);
+	if (feature[0] & ONFI_FEATURE_ON_DIE_ECC_EN)
+		return MICRON_ON_DIE_MANDATORY;
+
+	/*
+	 * Some Micron NANDs have an on-die ECC of 4/512, some other
+	 * 8/512. We only support the former.
+	 */
+	if (chip->onfi_params.ecc_bits != 4)
+		return MICRON_ON_DIE_UNSUPPORTED;
+
+	return MICRON_ON_DIE_SUPPORTED;
+}
+
 static int micron_nand_init(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
+	int ondie;
 	int ret;
 
 	ret = micron_nand_onfi_init(chip);
@@ -78,6 +272,34 @@ static int micron_nand_init(struct nand_chip *chip)
 	if (mtd->writesize == 2048)
 		chip->bbt_options |= NAND_BBT_SCAN2NDPAGE;
 
+	ondie = micron_supports_on_die_ecc(chip);
+
+	if (ondie == MICRON_ON_DIE_MANDATORY) {
+		pr_err("On-die ECC forcefully enabled, not supported\n");
+		return -EINVAL;
+	}
+
+	if (chip->ecc.mode == NAND_ECC_ON_DIE) {
+		if (ondie == MICRON_ON_DIE_UNSUPPORTED) {
+			pr_err("On-die ECC selected but not supported\n");
+			return -EINVAL;
+		}
+
+		chip->ecc.options = NAND_ECC_CUSTOM_PAGE_ACCESS;
+		chip->ecc.bytes = 8;
+		chip->ecc.size = 512;
+		chip->ecc.strength = 4;
+		chip->ecc.algo = NAND_ECC_BCH;
+		chip->ecc.read_page = micron_nand_read_page_on_die_ecc;
+		chip->ecc.write_page = micron_nand_write_page_on_die_ecc;
+		chip->ecc.read_page_raw =
+			micron_nand_read_page_raw_on_die_ecc;
+		chip->ecc.write_page_raw =
+			micron_nand_write_page_raw_on_die_ecc;
+
+		mtd_set_ooblayout(mtd, &micron_nand_on_die_ooblayout_ops);
+	}
+
 	return 0;
 }
 

diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index f8e463a..209170e 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c

@@ -166,7 +166,11 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 		}
 	}
 
-	clk_prepare_enable(info->clk);
+	ret = clk_prepare_enable(info->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare clock!\n");
+		return ret;
+	}
 
 	ret = nand_scan(mtd, 1);
 	if (ret)

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 649ba82..74dae4b 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c

@@ -1812,6 +1812,8 @@ static int alloc_nand_resource(struct platform_device *pdev)
 		chip->write_buf		= pxa3xx_nand_write_buf;
 		chip->options		|= NAND_NO_SUBPAGE_WRITE;
 		chip->cmdfunc		= nand_cmdfunc;
+		chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+		chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 	}
 
 	nand_hw_control_init(chip->controller);

diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
index 57d483a..88af714 100644
--- a/drivers/mtd/nand/qcom_nandc.c
+++ b/drivers/mtd/nand/qcom_nandc.c

@@ -2008,6 +2008,8 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
 	chip->read_byte		= qcom_nandc_read_byte;
 	chip->read_buf		= qcom_nandc_read_buf;
 	chip->write_buf		= qcom_nandc_write_buf;
+	chip->onfi_set_features	= nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features	= nand_onfi_get_set_features_notsupp;
 
 	/*
 	 * the bad block marker is readable only when we read the last codeword

diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index f0b030d..9e0c849 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c

@@ -812,9 +812,8 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
 	return -ENODEV;
 }
 
-static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	struct s3c2410_platform_nand *pdata = info->platform;

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 442ce61..891ac7b 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c

@@ -1183,6 +1183,8 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
+	nand->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	nand->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	if (pdata->flcmncr_val & SEL_16BIT)
 		nand->options |= NAND_BUSWIDTH_16;

diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 118a26f..d0b6f8f 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c

@@ -1301,7 +1301,6 @@ static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd,
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
-	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
 	for (i = data_offs / ecc->size;
 	     i < DIV_ROUND_UP(data_offs + readlen, ecc->size); i++) {
 		int data_off = i * ecc->size;
@@ -1592,9 +1591,8 @@ static int _sunxi_nand_lookup_timing(const s32 *lut, int lut_size, u32 duration,
 #define sunxi_nand_lookup_timing(l, p, c) \
 			_sunxi_nand_lookup_timing(l, ARRAY_SIZE(l), p, c)
 
-static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
-					const struct nand_data_interface *conf,
-					bool check_only)
+static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+					const struct nand_data_interface *conf)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *chip = to_sunxi_nand(nand);
@@ -1707,7 +1705,7 @@ static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd,
 		return tRHW;
 	}
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	/*
@@ -1922,7 +1920,6 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct mtd_info *mtd,
 	ecc->write_subpage = sunxi_nfc_hw_ecc_write_subpage;
 	ecc->read_oob_raw = nand_read_oob_std;
 	ecc->write_oob_raw = nand_write_oob_std;
-	ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage;
 
 	return 0;
 }

diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c
index 49b286c..9d40b79 100644
--- a/drivers/mtd/nand/tango_nand.c
+++ b/drivers/mtd/nand/tango_nand.c

@@ -303,7 +303,7 @@ static int tango_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 			    const u8 *buf, int oob_required, int page)
 {
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
-	int err, len = mtd->writesize;
+	int err, status, len = mtd->writesize;
 
 	/* Calling tango_write_oob() would send PAGEPROG twice */
 	if (oob_required)
@@ -314,6 +314,10 @@ static int tango_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	if (err)
 		return err;
 
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return 0;
 }
 
@@ -340,7 +344,7 @@ static void aux_write(struct nand_chip *chip, const u8 **buf, int len, int *pos)
 
 	if (!*buf) {
 		/* skip over "len" bytes */
-		chip->cmdfunc(mtd, NAND_CMD_SEQIN, *pos, -1);
+		chip->cmdfunc(mtd, NAND_CMD_RNDIN, *pos, -1);
 	} else {
 		tango_write_buf(mtd, *buf, len);
 		*buf += len;
@@ -431,9 +435,16 @@ static int tango_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 static int tango_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 				const u8 *buf, int oob_required, int page)
 {
+	int status;
+
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page);
 	raw_write(chip, buf, chip->oob_poi);
 	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
 	return 0;
 }
 
@@ -484,9 +495,8 @@ static u32 to_ticks(int kHz, int ps)
 	return DIV_ROUND_UP_ULL((u64)kHz * ps, NSEC_PER_SEC);
 }
 
-static int tango_set_timings(struct mtd_info *mtd,
-			     const struct nand_data_interface *conf,
-			     bool check_only)
+static int tango_set_timings(struct mtd_info *mtd, int csline,
+			     const struct nand_data_interface *conf)
 {
 	const struct nand_sdr_timings *sdr = nand_get_sdr_timings(conf);
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -498,7 +508,7 @@ static int tango_set_timings(struct mtd_info *mtd,
 	if (IS_ERR(sdr))
 		return PTR_ERR(sdr);
 
-	if (check_only)
+	if (csline == NAND_DATA_IFACE_CHECK_ONLY)
 		return 0;
 
 	Trdy = to_ticks(kHz, sdr->tCEA_max - sdr->tREA_max);

diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 3ea4bb1..744ab10 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c

@@ -703,6 +703,8 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 	chip->read_buf = vf610_nfc_read_buf;
 	chip->write_buf = vf610_nfc_write_buf;
 	chip->select_chip = vf610_nfc_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	chip->options |= NAND_NO_SUBPAGE_WRITE;
 

diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig
new file mode 100644
index 0000000..d206b3c
--- /dev/null
+++ b/drivers/mtd/parsers/Kconfig

@@ -0,0 +1,8 @@
+config MTD_PARSER_TRX
+	tristate "Parser for TRX format partitions"
+	depends on MTD && (BCM47XX || ARCH_BCM_5301X || COMPILE_TEST)
+	help
+	  TRX is a firmware format used by Broadcom on their devices. It
+	  may contain up to 3/4 partitions (depending on the version).
+	  This driver will parse TRX header and report at least two partitions:
+	  kernel and rootfs.

diff --git a/drivers/mtd/parsers/Makefile b/drivers/mtd/parsers/Makefile
new file mode 100644
index 0000000..4d9024e
--- /dev/null
+++ b/drivers/mtd/parsers/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_MTD_PARSER_TRX)		+= parser_trx.o

diff --git a/drivers/mtd/parsers/parser_trx.c b/drivers/mtd/parsers/parser_trx.c
new file mode 100644
index 0000000..df360a7
--- /dev/null
+++ b/drivers/mtd/parsers/parser_trx.c

@@ -0,0 +1,126 @@
+/*
+ * Parser for TRX format partitions
+ *
+ * Copyright (C) 2012 - 2017 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+
+#define TRX_PARSER_MAX_PARTS		4
+
+/* Magics */
+#define TRX_MAGIC			0x30524448
+#define UBI_EC_MAGIC			0x23494255	/* UBI# */
+
+struct trx_header {
+	uint32_t magic;
+	uint32_t length;
+	uint32_t crc32;
+	uint16_t flags;
+	uint16_t version;
+	uint32_t offset[3];
+} __packed;
+
+static const char *parser_trx_data_part_name(struct mtd_info *master,
+					     size_t offset)
+{
+	uint32_t buf;
+	size_t bytes_read;
+	int err;
+
+	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
+			(uint8_t *)&buf);
+	if (err && !mtd_is_bitflip(err)) {
+		pr_err("mtd_read error while parsing (offset: 0x%zX): %d\n",
+			offset, err);
+		goto out_default;
+	}
+
+	if (buf == UBI_EC_MAGIC)
+		return "ubi";
+
+out_default:
+	return "rootfs";
+}
+
+static int parser_trx_parse(struct mtd_info *mtd,
+			    const struct mtd_partition **pparts,
+			    struct mtd_part_parser_data *data)
+{
+	struct mtd_partition *parts;
+	struct mtd_partition *part;
+	struct trx_header trx;
+	size_t bytes_read;
+	uint8_t curr_part = 0, i = 0;
+	int err;
+
+	parts = kzalloc(sizeof(struct mtd_partition) * TRX_PARSER_MAX_PARTS,
+			GFP_KERNEL);
+	if (!parts)
+		return -ENOMEM;
+
+	err = mtd_read(mtd, 0, sizeof(trx), &bytes_read, (uint8_t *)&trx);
+	if (err) {
+		pr_err("MTD reading error: %d\n", err);
+		kfree(parts);
+		return err;
+	}
+
+	if (trx.magic != TRX_MAGIC) {
+		kfree(parts);
+		return -ENOENT;
+	}
+
+	/* We have LZMA loader if there is address in offset[2] */
+	if (trx.offset[2]) {
+		part = &parts[curr_part++];
+		part->name = "loader";
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	if (trx.offset[i]) {
+		part = &parts[curr_part++];
+		part->name = "linux";
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	if (trx.offset[i]) {
+		part = &parts[curr_part++];
+		part->name = parser_trx_data_part_name(mtd, trx.offset[i]);
+		part->offset = trx.offset[i];
+		i++;
+	}
+
+	/*
+	 * Assume that every partition ends at the beginning of the one it is
+	 * followed by.
+	 */
+	for (i = 0; i < curr_part; i++) {
+		u64 next_part_offset = (i < curr_part - 1) ?
+				       parts[i + 1].offset : mtd->size;
+
+		parts[i].size = next_part_offset - parts[i].offset;
+	}
+
+	*pparts = parts;
+	return i;
+};
+
+static struct mtd_part_parser mtd_parser_trx = {
+	.parse_fn = parser_trx_parse,
+	.name = "trx",
+};
+module_mtd_part_parser(mtd_parser_trx);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Parser for TRX format partitions");

diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index bfdfb1e..293c8a4 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig

@@ -108,7 +108,7 @@
 
 config SPI_STM32_QUADSPI
 	tristate "STM32 Quad SPI controller"
-	depends on ARCH_STM32
+	depends on ARCH_STM32 || COMPILE_TEST
 	help
 	  This enables support for the STM32 Quad SPI controller.
 	  We only connect the NOR to this controller.

diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 56051d3..0106357 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c

@@ -19,6 +19,7 @@
 #include <linux/mtd/spi-nor.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/sizes.h>
 #include <linux/sysfs.h>
 
 #define DEVICE_NAME	"aspeed-smc"
@@ -97,6 +98,7 @@ struct aspeed_smc_chip {
 	struct aspeed_smc_controller *controller;
 	void __iomem *ctl;			/* control register */
 	void __iomem *ahb_base;			/* base of chip window */
+	u32 ahb_window_size;			/* chip mapping window size */
 	u32 ctl_val[smc_max];			/* control settings */
 	enum aspeed_smc_flash_type type;	/* what type of flash */
 	struct spi_nor nor;
@@ -109,6 +111,7 @@ struct aspeed_smc_controller {
 	const struct aspeed_smc_info *info;	/* type info of controller */
 	void __iomem *regs;			/* controller registers */
 	void __iomem *ahb_base;			/* per-chip windows resource */
+	u32 ahb_window_size;			/* full mapping window size */
 
 	struct aspeed_smc_chip *chips[0];	/* pointers to attached chips */
 };
@@ -180,8 +183,7 @@ struct aspeed_smc_controller {
 
 #define CONTROL_KEEP_MASK						\
 	(CONTROL_AAF_MODE | CONTROL_CE_INACTIVE_MASK | CONTROL_CLK_DIV4 | \
-	 CONTROL_IO_DUMMY_MASK | CONTROL_CLOCK_FREQ_SEL_MASK |		\
-	 CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
+	 CONTROL_CLOCK_FREQ_SEL_MASK | CONTROL_LSB_FIRST | CONTROL_CLOCK_MODE_3)
 
 /*
  * The Segment Register uses a 8MB unit to encode the start address
@@ -194,6 +196,10 @@ struct aspeed_smc_controller {
 #define SEGMENT_ADDR_REG0		0x30
 #define SEGMENT_ADDR_START(_r)		((((_r) >> 16) & 0xFF) << 23)
 #define SEGMENT_ADDR_END(_r)		((((_r) >> 24) & 0xFF) << 23)
+#define SEGMENT_ADDR_VALUE(start, end)					\
+	(((((start) >> 23) & 0xFF) << 16) | ((((end) >> 23) & 0xFF) << 24))
+#define SEGMENT_ADDR_REG(controller, cs)	\
+	((controller)->regs + SEGMENT_ADDR_REG0 + (cs) * 4)
 
 /*
  * In user mode all data bytes read or written to the chip decode address
@@ -439,8 +445,7 @@ static void __iomem *aspeed_smc_chip_base(struct aspeed_smc_chip *chip,
 	u32 reg;
 
 	if (controller->info->nce > 1) {
-		reg = readl(controller->regs + SEGMENT_ADDR_REG0 +
-			    chip->cs * 4);
+		reg = readl(SEGMENT_ADDR_REG(controller, chip->cs));
 
 		if (SEGMENT_ADDR_START(reg) >= SEGMENT_ADDR_END(reg))
 			return NULL;
@@ -451,6 +456,146 @@ static void __iomem *aspeed_smc_chip_base(struct aspeed_smc_chip *chip,
 	return controller->ahb_base + offset;
 }
 
+static u32 aspeed_smc_ahb_base_phy(struct aspeed_smc_controller *controller)
+{
+	u32 seg0_val = readl(SEGMENT_ADDR_REG(controller, 0));
+
+	return SEGMENT_ADDR_START(seg0_val);
+}
+
+static u32 chip_set_segment(struct aspeed_smc_chip *chip, u32 cs, u32 start,
+			    u32 size)
+{
+	struct aspeed_smc_controller *controller = chip->controller;
+	void __iomem *seg_reg;
+	u32 seg_oldval, seg_newval, ahb_base_phy, end;
+
+	ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
+
+	seg_reg = SEGMENT_ADDR_REG(controller, cs);
+	seg_oldval = readl(seg_reg);
+
+	/*
+	 * If the chip size is not specified, use the default segment
+	 * size, but take into account the possible overlap with the
+	 * previous segment
+	 */
+	if (!size)
+		size = SEGMENT_ADDR_END(seg_oldval) - start;
+
+	/*
+	 * The segment cannot exceed the maximum window size of the
+	 * controller.
+	 */
+	if (start + size > ahb_base_phy + controller->ahb_window_size) {
+		size = ahb_base_phy + controller->ahb_window_size - start;
+		dev_warn(chip->nor.dev, "CE%d window resized to %dMB",
+			 cs, size >> 20);
+	}
+
+	end = start + size;
+	seg_newval = SEGMENT_ADDR_VALUE(start, end);
+	writel(seg_newval, seg_reg);
+
+	/*
+	 * Restore default value if something goes wrong. The chip
+	 * might have set some bogus value and we would loose access
+	 * to the chip.
+	 */
+	if (seg_newval != readl(seg_reg)) {
+		dev_err(chip->nor.dev, "CE%d window invalid", cs);
+		writel(seg_oldval, seg_reg);
+		start = SEGMENT_ADDR_START(seg_oldval);
+		end = SEGMENT_ADDR_END(seg_oldval);
+		size = end - start;
+	}
+
+	dev_info(chip->nor.dev, "CE%d window [ 0x%.8x - 0x%.8x ] %dMB",
+		 cs, start, end, size >> 20);
+
+	return size;
+}
+
+/*
+ * The segment register defines the mapping window on the AHB bus and
+ * it needs to be configured depending on the chip size. The segment
+ * register of the following CE also needs to be tuned in order to
+ * provide a contiguous window across multiple chips.
+ *
+ * This is expected to be called in increasing CE order
+ */
+static u32 aspeed_smc_chip_set_segment(struct aspeed_smc_chip *chip)
+{
+	struct aspeed_smc_controller *controller = chip->controller;
+	u32 ahb_base_phy, start;
+	u32 size = chip->nor.mtd.size;
+
+	/*
+	 * Each controller has a chip size limit for direct memory
+	 * access
+	 */
+	if (size > controller->info->maxsize)
+		size = controller->info->maxsize;
+
+	/*
+	 * The AST2400 SPI controller only handles one chip and does
+	 * not have segment registers. Let's use the chip size for the
+	 * AHB window.
+	 */
+	if (controller->info == &spi_2400_info)
+		goto out;
+
+	/*
+	 * The AST2500 SPI controller has a HW bug when the CE0 chip
+	 * size reaches 128MB. Enforce a size limit of 120MB to
+	 * prevent the controller from using bogus settings in the
+	 * segment register.
+	 */
+	if (chip->cs == 0 && controller->info == &spi_2500_info &&
+	    size == SZ_128M) {
+		size = 120 << 20;
+		dev_info(chip->nor.dev,
+			 "CE%d window resized to %dMB (AST2500 HW quirk)",
+			 chip->cs, size >> 20);
+	}
+
+	ahb_base_phy = aspeed_smc_ahb_base_phy(controller);
+
+	/*
+	 * As a start address for the current segment, use the default
+	 * start address if we are handling CE0 or use the previous
+	 * segment ending address
+	 */
+	if (chip->cs) {
+		u32 prev = readl(SEGMENT_ADDR_REG(controller, chip->cs - 1));
+
+		start = SEGMENT_ADDR_END(prev);
+	} else {
+		start = ahb_base_phy;
+	}
+
+	size = chip_set_segment(chip, chip->cs, start, size);
+
+	/* Update chip base address on the AHB bus */
+	chip->ahb_base = controller->ahb_base + (start - ahb_base_phy);
+
+	/*
+	 * Now, make sure the next segment does not overlap with the
+	 * current one we just configured, even if there is no
+	 * available chip. That could break access in Command Mode.
+	 */
+	if (chip->cs < controller->info->nce - 1)
+		chip_set_segment(chip, chip->cs + 1, start + size, 0);
+
+out:
+	if (size < chip->nor.mtd.size)
+		dev_warn(chip->nor.dev,
+			 "CE%d window too small for chip %dMB",
+			 chip->cs, (u32)chip->nor.mtd.size >> 20);
+
+	return size;
+}
+
 static void aspeed_smc_chip_enable_write(struct aspeed_smc_chip *chip)
 {
 	struct aspeed_smc_controller *controller = chip->controller;
@@ -524,7 +669,7 @@ static int aspeed_smc_chip_setup_init(struct aspeed_smc_chip *chip,
 	 */
 	chip->ahb_base = aspeed_smc_chip_base(chip, res);
 	if (!chip->ahb_base) {
-		dev_warn(chip->nor.dev, "CE segment window closed.\n");
+		dev_warn(chip->nor.dev, "CE%d window closed", chip->cs);
 		return -EINVAL;
 	}
 
@@ -571,6 +716,9 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 	if (chip->nor.addr_width == 4 && info->set_4b)
 		info->set_4b(chip);
 
+	/* This is for direct AHB access when using Command Mode. */
+	chip->ahb_window_size = aspeed_smc_chip_set_segment(chip);
+
 	/*
 	 * base mode has not been optimized yet. use it for writes.
 	 */
@@ -585,14 +733,12 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 	 * TODO: Adjust clocks if fast read is supported and interpret
 	 * SPI-NOR flags to adjust controller settings.
 	 */
-	switch (chip->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-		cmd = CONTROL_COMMAND_MODE_NORMAL;
-		break;
-	case SPI_NOR_FAST:
-		cmd = CONTROL_COMMAND_MODE_FREAD;
-		break;
-	default:
+	if (chip->nor.read_proto == SNOR_PROTO_1_1_1) {
+		if (chip->nor.read_dummy == 0)
+			cmd = CONTROL_COMMAND_MODE_NORMAL;
+		else
+			cmd = CONTROL_COMMAND_MODE_FREAD;
+	} else {
 		dev_err(chip->nor.dev, "unsupported SPI read mode\n");
 		return -EINVAL;
 	}
@@ -608,6 +754,11 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 				  struct device_node *np, struct resource *r)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	const struct aspeed_smc_info *info = controller->info;
 	struct device *dev = controller->dev;
 	struct device_node *child;
@@ -671,11 +822,11 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
 			break;
 
 		/*
-		 * TODO: Add support for SPI_NOR_QUAD and SPI_NOR_DUAL
+		 * TODO: Add support for Dual and Quad SPI protocols
 		 * attach when board support is present as determined
 		 * by of property.
 		 */
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_NORMAL);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			break;
 
@@ -731,6 +882,8 @@ static int aspeed_smc_probe(struct platform_device *pdev)
 	if (IS_ERR(controller->ahb_base))
 		return PTR_ERR(controller->ahb_base);
 
+	controller->ahb_window_size = resource_size(res);
+
 	ret = aspeed_smc_setup_flash(controller, np, res);
 	if (ret)
 		dev_err(dev, "Aspeed SMC probe failed %d\n", ret);

diff --git a/drivers/mtd/spi-nor/atmel-quadspi.c b/drivers/mtd/spi-nor/atmel-quadspi.c
index 47937d9..ba76fa8 100644
--- a/drivers/mtd/spi-nor/atmel-quadspi.c
+++ b/drivers/mtd/spi-nor/atmel-quadspi.c

@@ -275,14 +275,48 @@ static void atmel_qspi_debug_command(struct atmel_qspi *aq,
 
 static int atmel_qspi_run_command(struct atmel_qspi *aq,
 				  const struct atmel_qspi_command *cmd,
-				  u32 ifr_tfrtyp, u32 ifr_width)
+				  u32 ifr_tfrtyp, enum spi_nor_protocol proto)
 {
 	u32 iar, icr, ifr, sr;
 	int err = 0;
 
 	iar = 0;
 	icr = 0;
-	ifr = ifr_tfrtyp | ifr_width;
+	ifr = ifr_tfrtyp;
+
+	/* Set the SPI protocol */
+	switch (proto) {
+	case SNOR_PROTO_1_1_1:
+		ifr |= QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
+		break;
+
+	case SNOR_PROTO_1_1_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_1_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_OUTPUT;
+		break;
+
+	case SNOR_PROTO_1_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_IO;
+		break;
+
+	case SNOR_PROTO_1_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_IO;
+		break;
+
+	case SNOR_PROTO_2_2_2:
+		ifr |= QSPI_IFR_WIDTH_DUAL_CMD;
+		break;
+
+	case SNOR_PROTO_4_4_4:
+		ifr |= QSPI_IFR_WIDTH_QUAD_CMD;
+		break;
+
+	default:
+		return -EINVAL;
+	}
 
 	/* Compute instruction parameters */
 	if (cmd->enable.bits.instruction) {
@@ -434,7 +468,7 @@ static int atmel_qspi_read_reg(struct spi_nor *nor, u8 opcode,
 	cmd.rx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
@@ -450,7 +484,7 @@ static int atmel_qspi_write_reg(struct spi_nor *nor, u8 opcode,
 	cmd.tx_buf = buf;
 	cmd.buf_len = len;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
@@ -469,7 +503,7 @@ static ssize_t atmel_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
 	cmd.tx_buf = write_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE_MEM,
-				     QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				     nor->write_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -484,7 +518,7 @@ static int atmel_qspi_erase(struct spi_nor *nor, loff_t offs)
 	cmd.instruction = nor->erase_opcode;
 	cmd.address = (u32)offs;
 	return atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_WRITE,
-				      QSPI_IFR_WIDTH_SINGLE_BIT_SPI);
+				      nor->reg_proto);
 }
 
 static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
@@ -493,27 +527,8 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct atmel_qspi *aq = nor->priv;
 	struct atmel_qspi_command cmd;
 	u8 num_mode_cycles, num_dummy_cycles;
-	u32 ifr_width;
 	ssize_t ret;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
-		ifr_width = QSPI_IFR_WIDTH_SINGLE_BIT_SPI;
-		break;
-
-	case SPI_NOR_DUAL:
-		ifr_width = QSPI_IFR_WIDTH_DUAL_OUTPUT;
-		break;
-
-	case SPI_NOR_QUAD:
-		ifr_width = QSPI_IFR_WIDTH_QUAD_OUTPUT;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
 	if (nor->read_dummy >= 2) {
 		num_mode_cycles = 2;
 		num_dummy_cycles = nor->read_dummy - 2;
@@ -536,7 +551,7 @@ static ssize_t atmel_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	cmd.rx_buf = read_buf;
 	cmd.buf_len = len;
 	ret = atmel_qspi_run_command(aq, &cmd, QSPI_IFR_TFRTYP_TRSFR_READ_MEM,
-				     ifr_width);
+				     nor->read_proto);
 	return (ret < 0) ? ret : len;
 }
 
@@ -590,6 +605,20 @@ static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id)
 
 static int atmel_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_2_2 |
+			SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_READ_1_4_4 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_PP |
+			SNOR_HWCAPS_PP_1_1_4 |
+			SNOR_HWCAPS_PP_1_4_4 |
+			SNOR_HWCAPS_PP_4_4_4,
+	};
 	struct device_node *child, *np = pdev->dev.of_node;
 	struct atmel_qspi *aq;
 	struct resource *res;
@@ -679,7 +708,7 @@ static int atmel_qspi_probe(struct platform_device *pdev)
 	if (err)
 		goto disable_clk;
 
-	err = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	err = spi_nor_scan(nor, NULL, &hwcaps);
 	if (err)
 		goto disable_clk;
 

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 9f8102d..53c7d8e 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c

@@ -855,15 +855,14 @@ static int cqspi_set_protocol(struct spi_nor *nor, const int read)
 	f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		case SNOR_PROTO_1_1_1:
 			f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
 			break;
 		default:
@@ -1069,6 +1068,13 @@ static void cqspi_controller_init(struct cqspi_st *cqspi)
 
 static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct platform_device *pdev = cqspi->pdev;
 	struct device *dev = &pdev->dev;
 	struct cqspi_flash_pdata *f_pdata;
@@ -1123,7 +1129,7 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
 			goto err;
 		}
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto err;
 
@@ -1277,7 +1283,7 @@ static const struct dev_pm_ops cqspi__dev_pm_ops = {
 #define CQSPI_DEV_PM_OPS	NULL
 #endif
 
-static struct of_device_id const cqspi_dt_ids[] = {
+static const struct of_device_id cqspi_dt_ids[] = {
 	{.compatible = "cdns,qspi-nor",},
 	{ /* end of table */ }
 };

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 1476135..f17d224 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c

@@ -957,6 +957,10 @@ static void fsl_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 
 static int fsl_qspi_probe(struct platform_device *pdev)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device_node *np = pdev->dev.of_node;
 	struct device *dev = &pdev->dev;
 	struct fsl_qspi *q;
@@ -1065,7 +1069,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, NULL, &hwcaps);
 		if (ret)
 			goto mutex_failed;
 

diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index a286350..d110683 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c

@@ -120,19 +120,24 @@ static inline int wait_op_finish(struct hifmc_host *host)
 		(reg & FMC_INT_OP_DONE), 0, FMC_WAIT_TIMEOUT);
 }
 
-static int get_if_type(enum read_mode flash_read)
+static int get_if_type(enum spi_nor_protocol proto)
 {
 	enum hifmc_iftype if_type;
 
-	switch (flash_read) {
-	case SPI_NOR_DUAL:
+	switch (proto) {
+	case SNOR_PROTO_1_1_2:
 		if_type = IF_TYPE_DUAL;
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_2_2:
+		if_type = IF_TYPE_DIO;
+		break;
+	case SNOR_PROTO_1_1_4:
 		if_type = IF_TYPE_QUAD;
 		break;
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	case SNOR_PROTO_1_4_4:
+		if_type = IF_TYPE_QIO;
+		break;
+	case SNOR_PROTO_1_1_1:
 	default:
 		if_type = IF_TYPE_STD;
 		break;
@@ -253,7 +258,10 @@ static int hisi_spi_nor_dma_transfer(struct spi_nor *nor, loff_t start_off,
 	writel(FMC_DMA_LEN_SET(len), host->regbase + FMC_DMA_LEN);
 
 	reg = OP_CFG_FM_CS(priv->chipselect);
-	if_type = get_if_type(nor->flash_read);
+	if (op_type == FMC_OP_READ)
+		if_type = get_if_type(nor->read_proto);
+	else
+		if_type = get_if_type(nor->write_proto);
 	reg |= OP_CFG_MEM_IF_TYPE(if_type);
 	if (op_type == FMC_OP_READ)
 		reg |= OP_CFG_DUMMY_NUM(nor->read_dummy >> 3);
@@ -321,6 +329,13 @@ static ssize_t hisi_spi_nor_write(struct spi_nor *nor, loff_t to,
 static int hisi_spi_nor_register(struct device_node *np,
 				struct hifmc_host *host)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_READ_1_1_4 |
+			SNOR_HWCAPS_PP,
+	};
 	struct device *dev = host->dev;
 	struct spi_nor *nor;
 	struct hifmc_priv *priv;
@@ -362,7 +377,7 @@ static int hisi_spi_nor_register(struct device_node *np,
 	nor->read = hisi_spi_nor_read;
 	nor->write = hisi_spi_nor_write;
 	nor->erase = NULL;
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_QUAD);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 

diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index 986a3d0..8a596bf 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c

@@ -715,6 +715,11 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
 struct intel_spi *intel_spi_probe(struct device *dev,
 	struct resource *mem, const struct intel_spi_boardinfo *info)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	struct mtd_partition part;
 	struct intel_spi *ispi;
 	int ret;
@@ -746,7 +751,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
 	ispi->nor.write = intel_spi_write;
 	ispi->nor.erase = intel_spi_erase;
 
-	ret = spi_nor_scan(&ispi->nor, NULL, SPI_NOR_NORMAL);
+	ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_info(dev, "failed to locate the chip\n");
 		return ERR_PTR(ret);

diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index b637770..8a20ec4 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c

@@ -123,20 +123,20 @@ static void mt8173_nor_set_read_mode(struct mt8173_nor *mt8173_nor)
 {
 	struct spi_nor *nor = &mt8173_nor->nor;
 
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
+	switch (nor->read_proto) {
+	case SNOR_PROTO_1_1_1:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_FAST_READ, mt8173_nor->base +
 		       MTK_NOR_CFG1_REG);
 		break;
-	case SPI_NOR_DUAL:
+	case SNOR_PROTO_1_1_2:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA3_REG);
 		writeb(MTK_NOR_DUAL_READ_EN, mt8173_nor->base +
 		       MTK_NOR_DUAL_REG);
 		break;
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_4:
 		writeb(nor->read_opcode, mt8173_nor->base +
 		       MTK_NOR_PRGDATA4_REG);
 		writeb(MTK_NOR_QUAD_READ_EN, mt8173_nor->base +
@@ -408,6 +408,11 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
 static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 			struct device_node *flash_node)
 {
+	const struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_READ_1_1_2 |
+			SNOR_HWCAPS_PP,
+	};
 	int ret;
 	struct spi_nor *nor;
 
@@ -426,7 +431,7 @@ static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
 	nor->write_reg = mt8173_nor_write_reg;
 	nor->mtd.name = "mtk_nor";
 	/* initialized with NULL */
-	ret = spi_nor_scan(nor, NULL, SPI_NOR_DUAL);
+	ret = spi_nor_scan(nor, NULL, &hwcaps);
 	if (ret)
 		return ret;
 

diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index 73a14f4..1537421 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c

@@ -240,13 +240,12 @@ static int nxp_spifi_erase(struct spi_nor *nor, loff_t offs)
 
 static int nxp_spifi_setup_memory_cmd(struct nxp_spifi *spifi)
 {
-	switch (spifi->nor.flash_read) {
-	case SPI_NOR_NORMAL:
-	case SPI_NOR_FAST:
+	switch (spifi->nor.read_proto) {
+	case SNOR_PROTO_1_1_1:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_ALL_SERIAL;
 		break;
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
+	case SNOR_PROTO_1_1_2:
+	case SNOR_PROTO_1_1_4:
 		spifi->mcmd = SPIFI_CMD_FIELDFORM_QUAD_DUAL_DATA;
 		break;
 	default:
@@ -274,7 +273,11 @@ static void nxp_spifi_dummy_id_read(struct spi_nor *nor)
 static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 				 struct device_node *np)
 {
-	enum read_mode flash_read;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
 	u32 ctrl, property;
 	u16 mode = 0;
 	int ret;
@@ -308,13 +311,12 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 
 	if (mode & SPI_RX_DUAL) {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_DUAL;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
 	} else if (mode & SPI_RX_QUAD) {
 		ctrl &= ~SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	} else {
 		ctrl |= SPIFI_CTRL_DUAL;
-		flash_read = SPI_NOR_NORMAL;
 	}
 
 	switch (mode & (SPI_CPHA | SPI_CPOL)) {
@@ -351,7 +353,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
 	 */
 	nxp_spifi_dummy_id_read(&spifi->nor);
 
-	ret = spi_nor_scan(&spifi->nor, NULL, flash_read);
+	ret = spi_nor_scan(&spifi->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(spifi->dev, "device scan failed\n");
 		return ret;

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index dea8c9c..1413828 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c

@@ -150,24 +150,6 @@ static int read_cr(struct spi_nor *nor)
 }
 
 /*
- * Dummy Cycle calculation for different type of read.
- * It can be used to support more commands with
- * different dummy cycle requirements.
- */
-static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
-{
-	switch (nor->flash_read) {
-	case SPI_NOR_FAST:
-	case SPI_NOR_DUAL:
-	case SPI_NOR_QUAD:
-		return 8;
-	case SPI_NOR_NORMAL:
-		return 0;
-	}
-	return 0;
-}
-
-/*
  * Write status register 1 byte
  * Returns negative if error occurred.
  */
@@ -221,6 +203,10 @@ static inline u8 spi_nor_convert_3to4_read(u8 opcode)
 		{ SPINOR_OP_READ_1_2_2,	SPINOR_OP_READ_1_2_2_4B },
 		{ SPINOR_OP_READ_1_1_4,	SPINOR_OP_READ_1_1_4_4B },
 		{ SPINOR_OP_READ_1_4_4,	SPINOR_OP_READ_1_4_4_4B },
+
+		{ SPINOR_OP_READ_1_1_1_DTR,	SPINOR_OP_READ_1_1_1_DTR_4B },
+		{ SPINOR_OP_READ_1_2_2_DTR,	SPINOR_OP_READ_1_2_2_DTR_4B },
+		{ SPINOR_OP_READ_1_4_4_DTR,	SPINOR_OP_READ_1_4_4_DTR_4B },
 	};
 
 	return spi_nor_convert_opcode(opcode, spi_nor_3to4_read,
@@ -1022,10 +1008,12 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
-	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) },
 	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
-	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "mx66u51235f", INFO(0xc2253a, 0, 64 * 1024, 1024, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
+	{ "mx66l1g45g",  INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
 
 	/* Micron */
@@ -1036,7 +1024,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q064a",    INFO(0x20bb17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
-	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "n25q256ax1",  INFO(0x20bb19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
@@ -1076,6 +1064,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "s25fl164k",  INFO(0x014017,      0,  64 * 1024, 128, SECT_4K) },
 	{ "s25fl204k",  INFO(0x014013,      0,  64 * 1024,   8, SECT_4K | SPI_NOR_DUAL_READ) },
 	{ "s25fl208k",  INFO(0x014014,      0,  64 * 1024,  16, SECT_4K | SPI_NOR_DUAL_READ) },
+	{ "s25fl064l",  INFO(0x016017,      0,  64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
 
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
 	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
@@ -1159,7 +1148,9 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
-	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
+			SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
 
 	/* Catalyst / On Semiconductor -- non-JEDEC */
 	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
@@ -1403,8 +1394,9 @@ static int macronix_quad_enable(struct spi_nor *nor)
 
 	write_sr(nor, val | SR_QUAD_EN_MX);
 
-	if (spi_nor_wait_till_ready(nor))
-		return 1;
+	ret = spi_nor_wait_till_ready(nor);
+	if (ret)
+		return ret;
 
 	ret = read_sr(nor);
 	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
@@ -1460,30 +1452,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
-{
-	int status;
-
-	switch (JEDEC_MFR(info)) {
-	case SNOR_MFR_MACRONIX:
-		status = macronix_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Macronix quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	case SNOR_MFR_MICRON:
-		return 0;
-	default:
-		status = spansion_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Spansion quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
-	}
-}
-
 static int spi_nor_check(struct spi_nor *nor)
 {
 	if (!nor->dev || !nor->read || !nor->write ||
@@ -1536,8 +1504,349 @@ static int s3an_nor_scan(const struct flash_info *info, struct spi_nor *nor)
 	return 0;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
+struct spi_nor_read_command {
+	u8			num_mode_clocks;
+	u8			num_wait_states;
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+struct spi_nor_pp_command {
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+enum spi_nor_read_command_index {
+	SNOR_CMD_READ,
+	SNOR_CMD_READ_FAST,
+	SNOR_CMD_READ_1_1_1_DTR,
+
+	/* Dual SPI */
+	SNOR_CMD_READ_1_1_2,
+	SNOR_CMD_READ_1_2_2,
+	SNOR_CMD_READ_2_2_2,
+	SNOR_CMD_READ_1_2_2_DTR,
+
+	/* Quad SPI */
+	SNOR_CMD_READ_1_1_4,
+	SNOR_CMD_READ_1_4_4,
+	SNOR_CMD_READ_4_4_4,
+	SNOR_CMD_READ_1_4_4_DTR,
+
+	/* Octo SPI */
+	SNOR_CMD_READ_1_1_8,
+	SNOR_CMD_READ_1_8_8,
+	SNOR_CMD_READ_8_8_8,
+	SNOR_CMD_READ_1_8_8_DTR,
+
+	SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+	SNOR_CMD_PP,
+
+	/* Quad SPI */
+	SNOR_CMD_PP_1_1_4,
+	SNOR_CMD_PP_1_4_4,
+	SNOR_CMD_PP_4_4_4,
+
+	/* Octo SPI */
+	SNOR_CMD_PP_1_1_8,
+	SNOR_CMD_PP_1_8_8,
+	SNOR_CMD_PP_8_8_8,
+
+	SNOR_CMD_PP_MAX
+};
+
+struct spi_nor_flash_parameter {
+	u64				size;
+	u32				page_size;
+
+	struct spi_nor_hwcaps		hwcaps;
+	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
+	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
+
+	int (*quad_enable)(struct spi_nor *nor);
+};
+
+static void
+spi_nor_set_read_settings(struct spi_nor_read_command *read,
+			  u8 num_mode_clocks,
+			  u8 num_wait_states,
+			  u8 opcode,
+			  enum spi_nor_protocol proto)
 {
+	read->num_mode_clocks = num_mode_clocks;
+	read->num_wait_states = num_wait_states;
+	read->opcode = opcode;
+	read->proto = proto;
+}
+
+static void
+spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
+			u8 opcode,
+			enum spi_nor_protocol proto)
+{
+	pp->opcode = opcode;
+	pp->proto = proto;
+}
+
+static int spi_nor_init_params(struct spi_nor *nor,
+			       const struct flash_info *info,
+			       struct spi_nor_flash_parameter *params)
+{
+	/* Set legacy flash parameters as default. */
+	memset(params, 0, sizeof(*params));
+
+	/* Set SPI NOR sizes. */
+	params->size = info->sector_size * info->n_sectors;
+	params->page_size = info->page_size;
+
+	/* (Fast) Read settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_READ;
+	spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ],
+				  0, 0, SPINOR_OP_READ,
+				  SNOR_PROTO_1_1_1);
+
+	if (!(info->flags & SPI_NOR_NO_FR)) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_FAST],
+					  0, 8, SPINOR_OP_READ_FAST,
+					  SNOR_PROTO_1_1_1);
+	}
+
+	if (info->flags & SPI_NOR_DUAL_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_2],
+					  0, 8, SPINOR_OP_READ_1_1_2,
+					  SNOR_PROTO_1_1_2);
+	}
+
+	if (info->flags & SPI_NOR_QUAD_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_4],
+					  0, 8, SPINOR_OP_READ_1_1_4,
+					  SNOR_PROTO_1_1_4);
+	}
+
+	/* Page Program settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_PP;
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
+				SPINOR_OP_PP, SNOR_PROTO_1_1_1);
+
+	/* Select the procedure to set the Quad Enable bit. */
+	if (params->hwcaps.mask & (SNOR_HWCAPS_READ_QUAD |
+				   SNOR_HWCAPS_PP_QUAD)) {
+		switch (JEDEC_MFR(info)) {
+		case SNOR_MFR_MACRONIX:
+			params->quad_enable = macronix_quad_enable;
+			break;
+
+		case SNOR_MFR_MICRON:
+			break;
+
+		default:
+			params->quad_enable = spansion_quad_enable;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int spi_nor_hwcaps2cmd(u32 hwcaps, const int table[][2], size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; i++)
+		if (table[i][0] == (int)hwcaps)
+			return table[i][1];
+
+	return -EINVAL;
+}
+
+static int spi_nor_hwcaps_read2cmd(u32 hwcaps)
+{
+	static const int hwcaps_read2cmd[][2] = {
+		{ SNOR_HWCAPS_READ,		SNOR_CMD_READ },
+		{ SNOR_HWCAPS_READ_FAST,	SNOR_CMD_READ_FAST },
+		{ SNOR_HWCAPS_READ_1_1_1_DTR,	SNOR_CMD_READ_1_1_1_DTR },
+		{ SNOR_HWCAPS_READ_1_1_2,	SNOR_CMD_READ_1_1_2 },
+		{ SNOR_HWCAPS_READ_1_2_2,	SNOR_CMD_READ_1_2_2 },
+		{ SNOR_HWCAPS_READ_2_2_2,	SNOR_CMD_READ_2_2_2 },
+		{ SNOR_HWCAPS_READ_1_2_2_DTR,	SNOR_CMD_READ_1_2_2_DTR },
+		{ SNOR_HWCAPS_READ_1_1_4,	SNOR_CMD_READ_1_1_4 },
+		{ SNOR_HWCAPS_READ_1_4_4,	SNOR_CMD_READ_1_4_4 },
+		{ SNOR_HWCAPS_READ_4_4_4,	SNOR_CMD_READ_4_4_4 },
+		{ SNOR_HWCAPS_READ_1_4_4_DTR,	SNOR_CMD_READ_1_4_4_DTR },
+		{ SNOR_HWCAPS_READ_1_1_8,	SNOR_CMD_READ_1_1_8 },
+		{ SNOR_HWCAPS_READ_1_8_8,	SNOR_CMD_READ_1_8_8 },
+		{ SNOR_HWCAPS_READ_8_8_8,	SNOR_CMD_READ_8_8_8 },
+		{ SNOR_HWCAPS_READ_1_8_8_DTR,	SNOR_CMD_READ_1_8_8_DTR },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
+				  ARRAY_SIZE(hwcaps_read2cmd));
+}
+
+static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
+{
+	static const int hwcaps_pp2cmd[][2] = {
+		{ SNOR_HWCAPS_PP,		SNOR_CMD_PP },
+		{ SNOR_HWCAPS_PP_1_1_4,		SNOR_CMD_PP_1_1_4 },
+		{ SNOR_HWCAPS_PP_1_4_4,		SNOR_CMD_PP_1_4_4 },
+		{ SNOR_HWCAPS_PP_4_4_4,		SNOR_CMD_PP_4_4_4 },
+		{ SNOR_HWCAPS_PP_1_1_8,		SNOR_CMD_PP_1_1_8 },
+		{ SNOR_HWCAPS_PP_1_8_8,		SNOR_CMD_PP_1_8_8 },
+		{ SNOR_HWCAPS_PP_8_8_8,		SNOR_CMD_PP_8_8_8 },
+	};
+
+	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
+				  ARRAY_SIZE(hwcaps_pp2cmd));
+}
+
+static int spi_nor_select_read(struct spi_nor *nor,
+			       const struct spi_nor_flash_parameter *params,
+			       u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
+	const struct spi_nor_read_command *read;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_read2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	read = &params->reads[cmd];
+	nor->read_opcode = read->opcode;
+	nor->read_proto = read->proto;
+
+	/*
+	 * In the spi-nor framework, we don't need to make the difference
+	 * between mode clock cycles and wait state clock cycles.
+	 * Indeed, the value of the mode clock cycles is used by a QSPI
+	 * flash memory to know whether it should enter or leave its 0-4-4
+	 * (Continuous Read / XIP) mode.
+	 * eXecution In Place is out of the scope of the mtd sub-system.
+	 * Hence we choose to merge both mode and wait state clock cycles
+	 * into the so called dummy clock cycles.
+	 */
+	nor->read_dummy = read->num_mode_clocks + read->num_wait_states;
+	return 0;
+}
+
+static int spi_nor_select_pp(struct spi_nor *nor,
+			     const struct spi_nor_flash_parameter *params,
+			     u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
+	const struct spi_nor_pp_command *pp;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_pp2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	pp = &params->page_programs[cmd];
+	nor->program_opcode = pp->opcode;
+	nor->write_proto = pp->proto;
+	return 0;
+}
+
+static int spi_nor_select_erase(struct spi_nor *nor,
+				const struct flash_info *info)
+{
+	struct mtd_info *mtd = &nor->mtd;
+
+#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = SPINOR_OP_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else
+#endif
+	{
+		nor->erase_opcode = SPINOR_OP_SE;
+		mtd->erasesize = info->sector_size;
+	}
+	return 0;
+}
+
+static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
+			 const struct spi_nor_flash_parameter *params,
+			 const struct spi_nor_hwcaps *hwcaps)
+{
+	u32 ignored_mask, shared_mask;
+	bool enable_quad_io;
+	int err;
+
+	/*
+	 * Keep only the hardware capabilities supported by both the SPI
+	 * controller and the SPI flash memory.
+	 */
+	shared_mask = hwcaps->mask & params->hwcaps.mask;
+
+	/* SPI n-n-n protocols are not supported yet. */
+	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
+			SNOR_HWCAPS_READ_4_4_4 |
+			SNOR_HWCAPS_READ_8_8_8 |
+			SNOR_HWCAPS_PP_4_4_4 |
+			SNOR_HWCAPS_PP_8_8_8);
+	if (shared_mask & ignored_mask) {
+		dev_dbg(nor->dev,
+			"SPI n-n-n protocols are not supported yet.\n");
+		shared_mask &= ~ignored_mask;
+	}
+
+	/* Select the (Fast) Read command. */
+	err = spi_nor_select_read(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select read settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Page Program command. */
+	err = spi_nor_select_pp(nor, params, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select write settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Sector Erase command. */
+	err = spi_nor_select_erase(nor, info);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select erase settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Enable Quad I/O if needed. */
+	enable_quad_io = (spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+			  spi_nor_get_protocol_width(nor->write_proto) == 4);
+	if (enable_quad_io && params->quad_enable) {
+		err = params->quad_enable(nor);
+		if (err) {
+			dev_err(nor->dev, "quad mode not supported\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps)
+{
+	struct spi_nor_flash_parameter params;
 	const struct flash_info *info = NULL;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = &nor->mtd;
@@ -1549,6 +1858,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (ret)
 		return ret;
 
+	/* Reset SPI protocol for all commands. */
+	nor->reg_proto = SNOR_PROTO_1_1_1;
+	nor->read_proto = SNOR_PROTO_1_1_1;
+	nor->write_proto = SNOR_PROTO_1_1_1;
+
 	if (name)
 		info = spi_nor_match_id(name);
 	/* Try to auto-detect if chip name wasn't specified or not found */
@@ -1591,6 +1905,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & SPI_S3AN)
 		nor->flags |=  SNOR_F_READY_XSR_RDY;
 
+	/* Parse the Serial Flash Discoverable Parameters table. */
+	ret = spi_nor_init_params(nor, info, &params);
+	if (ret)
+		return ret;
+
 	/*
 	 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
 	 * with the software protection bits set
@@ -1611,7 +1930,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->type = MTD_NORFLASH;
 	mtd->writesize = 1;
 	mtd->flags = MTD_CAP_NORFLASH;
-	mtd->size = info->sector_size * info->n_sectors;
+	mtd->size = params.size;
 	mtd->_erase = spi_nor_erase;
 	mtd->_read = spi_nor_read;
 
@@ -1642,75 +1961,38 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->flags & NO_CHIP_ERASE)
 		nor->flags |= SNOR_F_NO_OP_CHIP_ERASE;
 
-#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
-	/* prefer "small sector" erase if possible */
-	if (info->flags & SECT_4K) {
-		nor->erase_opcode = SPINOR_OP_BE_4K;
-		mtd->erasesize = 4096;
-	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
-		mtd->erasesize = 4096;
-	} else
-#endif
-	{
-		nor->erase_opcode = SPINOR_OP_SE;
-		mtd->erasesize = info->sector_size;
-	}
-
 	if (info->flags & SPI_NOR_NO_ERASE)
 		mtd->flags |= MTD_NO_ERASE;
 
 	mtd->dev.parent = dev;
-	nor->page_size = info->page_size;
+	nor->page_size = params.page_size;
 	mtd->writebufsize = nor->page_size;
 
 	if (np) {
 		/* If we were instantiated by DT, use it */
 		if (of_property_read_bool(np, "m25p,fast-read"))
-			nor->flash_read = SPI_NOR_FAST;
+			params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 		else
-			nor->flash_read = SPI_NOR_NORMAL;
+			params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 	} else {
 		/* If we weren't instantiated by DT, default to fast-read */
-		nor->flash_read = SPI_NOR_FAST;
+		params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 	}
 
 	/* Some devices cannot do fast-read, no matter what DT tells us */
 	if (info->flags & SPI_NOR_NO_FR)
-		nor->flash_read = SPI_NOR_NORMAL;
+		params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
 
-	/* Quad/Dual-read mode takes precedence over fast/normal */
-	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
-		ret = set_quad_mode(nor, info);
-		if (ret) {
-			dev_err(dev, "quad mode not supported\n");
-			return ret;
-		}
-		nor->flash_read = SPI_NOR_QUAD;
-	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
-		nor->flash_read = SPI_NOR_DUAL;
-	}
-
-	/* Default commands */
-	switch (nor->flash_read) {
-	case SPI_NOR_QUAD:
-		nor->read_opcode = SPINOR_OP_READ_1_1_4;
-		break;
-	case SPI_NOR_DUAL:
-		nor->read_opcode = SPINOR_OP_READ_1_1_2;
-		break;
-	case SPI_NOR_FAST:
-		nor->read_opcode = SPINOR_OP_READ_FAST;
-		break;
-	case SPI_NOR_NORMAL:
-		nor->read_opcode = SPINOR_OP_READ;
-		break;
-	default:
-		dev_err(dev, "No Read opcode defined\n");
-		return -EINVAL;
-	}
-
-	nor->program_opcode = SPINOR_OP_PP;
+	/*
+	 * Configure the SPI memory:
+	 * - select op codes for (Fast) Read, Page Program and Sector Erase.
+	 * - set the number of dummy cycles (mode cycles + wait states).
+	 * - set the SPI protocols for register and memory accesses.
+	 * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
+	 */
+	ret = spi_nor_setup(nor, info, &params, hwcaps);
+	if (ret)
+		return ret;
 
 	if (info->addr_width)
 		nor->addr_width = info->addr_width;
@@ -1732,8 +2014,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 		return -EINVAL;
 	}
 
-	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
-
 	if (info->flags & SPI_S3AN) {
 		ret = s3an_nor_scan(info, nor);
 		if (ret)

diff --git a/drivers/mtd/spi-nor/stm32-quadspi.c b/drivers/mtd/spi-nor/stm32-quadspi.c
index ae45f81..86c0931 100644
--- a/drivers/mtd/spi-nor/stm32-quadspi.c
+++ b/drivers/mtd/spi-nor/stm32-quadspi.c

@@ -19,6 +19,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
+#include <linux/sizes.h>
 
 #define QUADSPI_CR		0x00
 #define CR_EN			BIT(0)
@@ -192,15 +193,15 @@ static void stm32_qspi_set_framemode(struct spi_nor *nor,
 	cmd->framemode = CCR_IMODE_1;
 
 	if (read) {
-		switch (nor->flash_read) {
-		case SPI_NOR_NORMAL:
-		case SPI_NOR_FAST:
+		switch (nor->read_proto) {
+		default:
+		case SNOR_PROTO_1_1_1:
 			dmode = CCR_DMODE_1;
 			break;
-		case SPI_NOR_DUAL:
+		case SNOR_PROTO_1_1_2:
 			dmode = CCR_DMODE_2;
 			break;
-		case SPI_NOR_QUAD:
+		case SNOR_PROTO_1_1_4:
 			dmode = CCR_DMODE_4;
 			break;
 		}
@@ -375,7 +376,7 @@ static ssize_t stm32_qspi_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct stm32_qspi_cmd cmd;
 	int err;
 
-	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#x\n",
+	dev_dbg(qspi->dev, "read(%#.2x): buf:%p from:%#.8x len:%#zx\n",
 		nor->read_opcode, buf, (u32)from, len);
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -402,7 +403,7 @@ static ssize_t stm32_qspi_write(struct spi_nor *nor, loff_t to, size_t len,
 	struct stm32_qspi_cmd cmd;
 	int err;
 
-	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#x\n",
+	dev_dbg(dev, "write(%#.2x): buf:%p to:%#.8x len:%#zx\n",
 		nor->program_opcode, buf, (u32)to, len);
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -480,7 +481,12 @@ static void stm32_qspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
 static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 				  struct device_node *np)
 {
-	u32 width, flash_read, presc, cs_num, max_rate = 0;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
+	u32 width, presc, cs_num, max_rate = 0;
 	struct stm32_qspi_flash *flash;
 	struct mtd_info *mtd;
 	int ret;
@@ -499,12 +505,10 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 		width = 1;
 
 	if (width == 4)
-		flash_read = SPI_NOR_QUAD;
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
 	else if (width == 2)
-		flash_read = SPI_NOR_DUAL;
-	else if (width == 1)
-		flash_read = SPI_NOR_NORMAL;
-	else
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+	else if (width != 1)
 		return -EINVAL;
 
 	flash = &qspi->flash[cs_num];
@@ -539,7 +543,7 @@ static int stm32_qspi_flash_setup(struct stm32_qspi *qspi,
 	 */
 	flash->fsize = FSIZE_VAL(SZ_1K);
 
-	ret = spi_nor_scan(&flash->nor, NULL, flash_read);
+	ret = spi_nor_scan(&flash->nor, NULL, &hwcaps);
 	if (ret) {
 		dev_err(qspi->dev, "device scan failed\n");
 		return ret;

diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c
index aecc6ce..fa2519a 100644
--- a/drivers/mtd/tests/subpagetest.c
+++ b/drivers/mtd/tests/subpagetest.c

@@ -102,7 +102,7 @@ static int write_eraseblock2(int ebnum)
 		if (unlikely(err || written != subpgsize * k)) {
 			pr_err("error: write failed at %#llx\n",
 			       (long long)addr);
-			if (written != subpgsize) {
+			if (written != subpgsize * k) {
 				pr_err("  write size: %#x\n",
 				       subpgsize * k);
 				pr_err("  written: %#08zx\n",

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index a19f68f..e7c8539 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

@@ -3458,13 +3458,18 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
 	req.ver_upd = DRV_VER_UPD;
 
 	if (BNXT_PF(bp)) {
-		DECLARE_BITMAP(vf_req_snif_bmap, 256);
-		u32 *data = (u32 *)vf_req_snif_bmap;
+		u32 data[8];
 		int i;
 
-		memset(vf_req_snif_bmap, 0, sizeof(vf_req_snif_bmap));
-		for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++)
-			__set_bit(bnxt_vf_req_snif[i], vf_req_snif_bmap);
+		memset(data, 0, sizeof(data));
+		for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++) {
+			u16 cmd = bnxt_vf_req_snif[i];
+			unsigned int bit, idx;
+
+			idx = cmd / 32;
+			bit = cmd % 32;
+			data[idx] |= 1 << bit;
+		}
 
 		for (i = 0; i < 8; i++)
 			req.vf_req_fwd[i] = cpu_to_le32(data[i]);
@@ -6279,6 +6284,12 @@ static int bnxt_open(struct net_device *dev)
 	return __bnxt_open_nic(bp, true, true);
 }
 
+static bool bnxt_drv_busy(struct bnxt *bp)
+{
+	return (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state) ||
+		test_bit(BNXT_STATE_READ_STATS, &bp->state));
+}
+
 int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
@@ -6297,7 +6308,7 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 
 	clear_bit(BNXT_STATE_OPEN, &bp->state);
 	smp_mb__after_atomic();
-	while (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state))
+	while (bnxt_drv_busy(bp))
 		msleep(20);
 
 	/* Flush rings and and disable interrupts */
@@ -6358,8 +6369,15 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	u32 i;
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!bp->bnapi)
+	set_bit(BNXT_STATE_READ_STATS, &bp->state);
+	/* Make sure bnxt_close_nic() sees that we are reading stats before
+	 * we check the BNXT_STATE_OPEN flag.
+	 */
+	smp_mb__after_atomic();
+	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
+		clear_bit(BNXT_STATE_READ_STATS, &bp->state);
 		return;
+	}
 
 	/* TODO check if we need to synchronize with bnxt_close path */
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -6406,6 +6424,7 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns);
 		stats->tx_errors = le64_to_cpu(tx->tx_err);
 	}
+	clear_bit(BNXT_STATE_READ_STATS, &bp->state);
 }
 
 static bool bnxt_mc_list_updated(struct bnxt *bp, u32 *rx_mask)
@@ -6904,16 +6923,13 @@ static void bnxt_sp_task(struct work_struct *work)
 }
 
 /* Under rtnl_lock */
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp)
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		       int tx_xdp)
 {
 	int max_rx, max_tx, tx_sets = 1;
 	int tx_rings_needed;
-	bool sh = true;
 	int rc;
 
-	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS))
-		sh = false;
-
 	if (tcs)
 		tx_sets = tcs;
 
@@ -7121,7 +7137,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 		sh = true;
 
 	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				tc, bp->tx_nr_rings_xdp);
+				sh, tc, bp->tx_nr_rings_xdp);
 	if (rc)
 		return rc;
 

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f872a7d..f34691f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

@@ -1117,6 +1117,7 @@ struct bnxt {
 	unsigned long		state;
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
+#define BNXT_STATE_READ_STATS	2
 
 	struct bnxt_irq	*irq_tbl;
 	int			total_irqs;
@@ -1300,7 +1301,8 @@ int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
 int bnxt_close_nic(struct bnxt *, bool, bool);
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, int tcs, int tx_xdp);
+int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		       int tx_xdp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 void bnxt_restore_pf_fw_resources(struct bnxt *bp);

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index fd11815..be6acad 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

@@ -432,7 +432,8 @@ static int bnxt_set_channels(struct net_device *dev,
 		}
 		tx_xdp = req_rx_rings;
 	}
-	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, tcs, tx_xdp);
+	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, sh, tcs,
+				tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to allocate the requested rings\n");
 		return rc;

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 7d67552..3961a68 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c

@@ -170,7 +170,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	if (!tc)
 		tc = 1;
 	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				tc, tx_xdp);
+				true, tc, tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n");
 		return rc;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 86f92e3..e403fa1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

@@ -2083,12 +2083,12 @@ static void detach_ulds(struct adapter *adap)
 
 	mutex_lock(&uld_mutex);
 	list_del(&adap->list_node);
+
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld && adap->uld[i].handle) {
+		if (adap->uld && adap->uld[i].handle)
 			adap->uld[i].state_change(adap->uld[i].handle,
 					     CXGB4_STATE_DETACH);
-			adap->uld[i].handle = NULL;
-		}
+
 	if (netevent_registered && list_empty(&adapter_list)) {
 		unregister_netevent_notifier(&cxgb4_netevent_nb);
 		netevent_registered = false;
@@ -5303,8 +5303,10 @@ static void remove_one(struct pci_dev *pdev)
 		 */
 		destroy_workqueue(adapter->workq);
 
-		if (is_uld(adapter))
+		if (is_uld(adapter)) {
 			detach_ulds(adapter);
+			t4_uld_clean_up(adapter);
+		}
 
 		disable_interrupts(adapter);
 
@@ -5385,7 +5387,11 @@ static void shutdown_one(struct pci_dev *pdev)
 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
 				cxgb_close(adapter->port[i]);
 
-		t4_uld_clean_up(adapter);
+		if (is_uld(adapter)) {
+			detach_ulds(adapter);
+			t4_uld_clean_up(adapter);
+		}
+
 		disable_interrupts(adapter);
 		disable_msi(adapter);
 

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index ec53fe9..71a315b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c

@@ -589,22 +589,37 @@ void t4_uld_mem_free(struct adapter *adap)
 	kfree(adap->uld);
 }
 
+/* This function should be called with uld_mutex taken. */
+static void cxgb4_shutdown_uld_adapter(struct adapter *adap, enum cxgb4_uld type)
+{
+	if (adap->uld[type].handle) {
+		adap->uld[type].handle = NULL;
+		adap->uld[type].add = NULL;
+		release_sge_txq_uld(adap, type);
+
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, type);
+
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, type);
+
+		free_sge_queues_uld(adap, type);
+		free_queues_uld(adap, type);
+	}
+}
+
 void t4_uld_clean_up(struct adapter *adap)
 {
 	unsigned int i;
 
-	if (!adap->uld)
-		return;
+	mutex_lock(&uld_mutex);
 	for (i = 0; i < CXGB4_ULD_MAX; i++) {
 		if (!adap->uld[i].handle)
 			continue;
-		if (adap->flags & FULL_INIT_DONE)
-			quiesce_rx_uld(adap, i);
-		if (adap->flags & USING_MSIX)
-			free_msix_queue_irqs_uld(adap, i);
-		free_sge_queues_uld(adap, i);
-		free_queues_uld(adap, i);
+
+		cxgb4_shutdown_uld_adapter(adap, i);
 	}
+	mutex_unlock(&uld_mutex);
 }
 
 static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
@@ -783,15 +798,8 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 			continue;
 		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
 			continue;
-		adap->uld[type].handle = NULL;
-		adap->uld[type].add = NULL;
-		release_sge_txq_uld(adap, type);
-		if (adap->flags & FULL_INIT_DONE)
-			quiesce_rx_uld(adap, type);
-		if (adap->flags & USING_MSIX)
-			free_msix_queue_irqs_uld(adap, type);
-		free_sge_queues_uld(adap, type);
-		free_queues_uld(adap, type);
+
+		cxgb4_shutdown_uld_adapter(adap, type);
 	}
 	mutex_unlock(&uld_mutex);
 

diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 1841ad4..39bad67 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c

@@ -402,8 +402,8 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 	fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index);
 	if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone  */
 		vdev_err(vdev, "Fatal error in devcmd2 init - hardware surprise removal\n");
-
-		return -ENODEV;
+		err = -ENODEV;
+		goto err_free_wq;
 	}
 
 	enic_wq_init_start(&vdev->devcmd2->wq, 0, fetch_index, fetch_index, 0,
@@ -414,7 +414,7 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 	err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring,
 				       DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE);
 	if (err)
-		goto err_free_wq;
+		goto err_disable_wq;
 
 	vdev->devcmd2->result = vdev->devcmd2->results_ring.descs;
 	vdev->devcmd2->cmd_ring = vdev->devcmd2->wq.ring.descs;
@@ -433,8 +433,9 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
 
 err_free_desc_ring:
 	vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring);
-err_free_wq:
+err_disable_wq:
 	vnic_wq_disable(&vdev->devcmd2->wq);
+err_free_wq:
 	vnic_wq_free(&vdev->devcmd2->wq);
 err_free_devcmd2:
 	kfree(vdev->devcmd2);

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index fe166e0..3987699 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c

@@ -1378,13 +1378,20 @@ void hns_nic_net_reset(struct net_device *ndev)
 void hns_nic_net_reinit(struct net_device *netdev)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
+	enum hnae_port_type type = priv->ae_handle->port_type;
 
 	netif_trans_update(priv->netdev);
 	while (test_and_set_bit(NIC_STATE_REINITING, &priv->state))
 		usleep_range(1000, 2000);
 
 	hns_nic_net_down(netdev);
-	hns_nic_net_reset(netdev);
+
+	/* Only do hns_nic_net_reset in debug mode
+	 * because of hardware limitation.
+	 */
+	if (type == HNAE_PORT_DEBUG)
+		hns_nic_net_reset(netdev);
+
 	(void)hns_nic_net_up(netdev);
 	clear_bit(NIC_STATE_REINITING, &priv->state);
 }
@@ -1997,13 +2004,8 @@ static void hns_nic_reset_subtask(struct hns_nic_priv *priv)
 	rtnl_lock();
 	/* put off any impending NetWatchDogTimeout */
 	netif_trans_update(priv->netdev);
+	hns_nic_net_reinit(priv->netdev);
 
-	if (type == HNAE_PORT_DEBUG) {
-		hns_nic_net_reinit(priv->netdev);
-	} else {
-		netif_carrier_off(priv->netdev);
-		netif_tx_disable(priv->netdev);
-	}
 	rtnl_unlock();
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index ca36744..9d17e4e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile

@@ -4,14 +4,14 @@
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o lib/gid.o
+		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o
 
 mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
 		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
 		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/Makefile

@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/Makefile

@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 4a78aef..4614ddf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c

@@ -372,7 +372,7 @@ void mlx5e_ipsec_build_inverse_table(void)
 	 */
 	mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
 	for (mss = 2; mss < MAX_LSO_MSS; mss++) {
-		mss_inv = ((1ULL << 32) / mss) >> 16;
+		mss_inv = div_u64(1ULL << 32, mss) >> 16;
 		mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
 	}
 }

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 8fa23f6..2eb54d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

@@ -464,6 +464,8 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
 	if (!perm_addr)
 		return;
 
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/Makefile

@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
index 31e5a26..9034e99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c

@@ -102,7 +102,7 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
 	return 0;
 }
 
-int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
 {
 	int err;
 	struct mlx5_core_dev *mdev = fdev->mdev;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 42970e2..35d0e33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c

@@ -275,7 +275,7 @@ int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 {
 	struct mlx5_fpga_device *fdev = mdev->fpga;
 	unsigned int i;
-	u32 *data;
+	__be32 *data;
 	u32 count;
 	u64 addr;
 	int ret;
@@ -290,7 +290,7 @@ int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 
 	count = mlx5_fpga_ipsec_counters_count(mdev);
 
-	data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+	data = kzalloc(sizeof(*data) * count * 2, GFP_KERNEL);
 	if (!data) {
 		ret = -ENOMEM;
 		goto out;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/Makefile

@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/Makefile

@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index de2aed4..573f59f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c

@@ -34,6 +34,7 @@
 #include <linux/etherdevice.h>
 #include <linux/idr.h>
 #include "mlx5_core.h"
+#include "lib/mlx5.h"
 
 void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
 {

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 192cb93..383fef5a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

@@ -1790,6 +1790,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_nexthop_neigh_init:
+	mlxsw_sp_nexthop_rif_fini(nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 	return err;
 }
@@ -1866,6 +1867,7 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
 	nh_grp->count = fi->fib_nhs;
 	nh_grp->key.fi = fi;
+	fib_info_hold(fi);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 		fib_nh = &fi->fib_nh[i];
@@ -1885,6 +1887,7 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 		nh = &nh_grp->nexthops[i];
 		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
 	}
+	fib_info_put(nh_grp->key.fi);
 	kfree(nh_grp);
 	return ERR_PTR(err);
 }
@@ -1903,6 +1906,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
 	}
 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
 	WARN_ON_ONCE(nh_grp->adj_index_valid);
+	fib_info_put(nh_grp->key.fi);
 	kfree(nh_grp);
 }
 

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index cd89a3e..656b2d3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c

@@ -979,7 +979,6 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid);
 	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
-	struct mlxsw_sp_bridge_vlan *bridge_vlan;
 	u16 old_pvid = mlxsw_sp_port->pvid;
 	int err;
 
@@ -1000,8 +999,6 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (err)
 		goto err_port_vlan_bridge_join;
 
-	bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
-
 	return 0;
 
 err_port_vlan_bridge_join:
@@ -1919,6 +1916,8 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 		memcpy(&switchdev_work->fdb_info, ptr,
 		       sizeof(switchdev_work->fdb_info));
 		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!switchdev_work->fdb_info.addr)
+			goto err_addr_alloc;
 		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
 				fdb_info->addr);
 		/* Take a reference on the device. This can be either
@@ -1935,6 +1934,10 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 	mlxsw_core_schedule_work(&switchdev_work->work);
 
 	return NOTIFY_DONE;
+
+err_addr_alloc:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
 }
 
 static struct notifier_block mlxsw_sp_switchdev_notifier = {

diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index fec0ff2..3226ddc 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c

@@ -419,7 +419,7 @@ int nfp_flower_metadata_init(struct nfp_app *app)
 	return 0;
 
 err_free_last_used:
-	kfree(priv->stats_ids.free_list.buf);
+	kfree(priv->mask_ids.last_used);
 err_free_mask_id:
 	kfree(priv->mask_ids.mask_id_free_list.buf);
 	return -ENOMEM;

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index b251eba..9d989c96 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c

@@ -575,7 +575,7 @@ qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
 
 	if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+			   "local_ip=%pI4h:%x, remote_ip=%pI4h:%x, vlan=%x\n",
 			   p_tcp_ramrod->tcp.local_ip,
 			   p_tcp_ramrod->tcp.local_port,
 			   p_tcp_ramrod->tcp.remote_ip,
@@ -583,7 +583,7 @@ qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
 			   p_tcp_ramrod->tcp.vlan_id);
 	} else {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+			   "local_ip=%pI6:%x, remote_ip=%pI6:%x, vlan=%x\n",
 			   p_tcp_ramrod->tcp.local_ip,
 			   p_tcp_ramrod->tcp.local_port,
 			   p_tcp_ramrod->tcp.remote_ip,
@@ -1519,7 +1519,7 @@ qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
 			   cm_info->vlan);
 	else
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-			   "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+			   "remote_ip %pI6:%x, local_ip %pI6:%x vlan=%x\n",
 			   cm_info->remote_ip, cm_info->remote_port,
 			   cm_info->local_ip, cm_info->local_port,
 			   cm_info->vlan);

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 761c518..13f72f5 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c

@@ -5034,12 +5034,9 @@ static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
 	struct netdev_hw_addr *uc;
-	int addr_count;
 	unsigned int i;
 
-	addr_count = netdev_uc_count(net_dev);
 	table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
-	table->dev_uc_count = 1 + addr_count;
 	ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
 	i = 1;
 	netdev_for_each_uc_addr(uc, net_dev) {
@@ -5050,6 +5047,8 @@ static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
 		ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
 		i++;
 	}
+
+	table->dev_uc_count = i;
 }
 
 static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
@@ -5057,12 +5056,11 @@ static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
 	struct efx_ef10_filter_table *table = efx->filter_state;
 	struct net_device *net_dev = efx->net_dev;
 	struct netdev_hw_addr *mc;
-	unsigned int i, addr_count;
+	unsigned int i;
 
 	table->mc_overflow = false;
 	table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
 
-	addr_count = netdev_mc_count(net_dev);
 	i = 0;
 	netdev_for_each_mc_addr(mc, net_dev) {
 		if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index ea1bbc3..0b6a39b 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c

@@ -2467,6 +2467,10 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 	pdata = netdev_priv(dev);
 	dev->irq = irq;
 	pdata->ioaddr = ioremap_nocache(res->start, res_size);
+	if (!pdata->ioaddr) {
+		retval = -ENOMEM;
+		goto out_ioremap_fail;
+	}
 
 	pdata->dev = dev;
 	pdata->msg_enable = ((1 << debug) - 1);
@@ -2572,6 +2576,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev)
 	smsc911x_free_resources(pdev);
 out_request_resources_fail:
 	iounmap(pdata->ioaddr);
+out_ioremap_fail:
 	free_netdev(dev);
 out_release_io_1:
 	release_mem_region(res->start, resource_size(res));

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 6c2d1da..fffd6d5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 {
 	struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
 	struct device_node *node = priv->device->of_node;
-	int ret, phy_interface;
+	int ret;
 	u32 reg, val;
 
 	regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
@@ -718,11 +718,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
 	if (gmac->variant->support_rmii)
 		reg &= ~SYSCON_RMII_EN;
 
-	phy_interface = priv->plat->interface;
-	/* if PHY is internal, select the mode (xMII) used by the SoC */
-	if (gmac->use_internal_phy)
-		phy_interface = gmac->variant->internal_phy;
-	switch (phy_interface) {
+	switch (priv->plat->interface) {
 	case PHY_INTERFACE_MODE_MII:
 		/* default */
 		break;
@@ -936,7 +932,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	}
 
 	plat_dat->interface = of_get_phy_mode(dev->of_node);
-	if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
+	if (plat_dat->interface == gmac->variant->internal_phy) {
 		dev_info(&pdev->dev, "Will use internal PHY\n");
 		gmac->use_internal_phy = true;
 		gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 19bba62..1853f7f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -1449,7 +1449,7 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
 {
 	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue = 0;
+	u32 queue;
 
 	/* Free TX queue resources */
 	for (queue = 0; queue < tx_count; queue++) {
@@ -1498,7 +1498,7 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 						    sizeof(dma_addr_t),
 						    GFP_KERNEL);
 		if (!rx_q->rx_skbuff_dma)
-			return -ENOMEM;
+			goto err_dma;
 
 		rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE,
 						sizeof(struct sk_buff *),
@@ -1561,13 +1561,13 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 						    sizeof(*tx_q->tx_skbuff_dma),
 						    GFP_KERNEL);
 		if (!tx_q->tx_skbuff_dma)
-			return -ENOMEM;
+			goto err_dma;
 
 		tx_q->tx_skbuff = kmalloc_array(DMA_TX_SIZE,
 						sizeof(struct sk_buff *),
 						GFP_KERNEL);
 		if (!tx_q->tx_skbuff)
-			goto err_dma_buffers;
+			goto err_dma;
 
 		if (priv->extend_desc) {
 			tx_q->dma_etx = dma_zalloc_coherent(priv->device,
@@ -1577,7 +1577,7 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 							    &tx_q->dma_tx_phy,
 							    GFP_KERNEL);
 			if (!tx_q->dma_etx)
-				goto err_dma_buffers;
+				goto err_dma;
 		} else {
 			tx_q->dma_tx = dma_zalloc_coherent(priv->device,
 							   DMA_TX_SIZE *
@@ -1586,13 +1586,13 @@ static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
 							   &tx_q->dma_tx_phy,
 							   GFP_KERNEL);
 			if (!tx_q->dma_tx)
-				goto err_dma_buffers;
+				goto err_dma;
 		}
 	}
 
 	return 0;
 
-err_dma_buffers:
+err_dma:
 	free_dma_tx_desc_resources(priv);
 
 	return ret;

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 4daf3d0..0250aa9 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c

@@ -418,6 +418,8 @@ static int ntb_netdev_probe(struct device *client_dev)
 	if (!ndev)
 		return -ENOMEM;
 
+	SET_NETDEV_DEV(ndev, client_dev);
+
 	dev = netdev_priv(ndev);
 	dev->ndev = ndev;
 	dev->pdev = pdev;

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 9af3239..3570c75 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c

@@ -106,7 +106,7 @@ struct major_info {
 	struct rcu_head rcu;
 	dev_t major;
 	struct idr minor_idr;
-	struct mutex minor_lock;
+	spinlock_t minor_lock;
 	const char *device_name;
 	struct list_head next;
 };
@@ -416,15 +416,15 @@ int tap_get_minor(dev_t major, struct tap_dev *tap)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
-	retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL);
+	spin_lock(&tap_major->minor_lock);
+	retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_ATOMIC);
 	if (retval >= 0) {
 		tap->minor = retval;
 	} else if (retval == -ENOSPC) {
 		netdev_err(tap->dev, "Too many tap devices\n");
 		retval = -EINVAL;
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -442,12 +442,12 @@ void tap_free_minor(dev_t major, struct tap_dev *tap)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
+	spin_lock(&tap_major->minor_lock);
 	if (tap->minor) {
 		idr_remove(&tap_major->minor_idr, tap->minor);
 		tap->minor = 0;
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -467,13 +467,13 @@ static struct tap_dev *dev_get_by_tap_file(int major, int minor)
 		goto unlock;
 	}
 
-	mutex_lock(&tap_major->minor_lock);
+	spin_lock(&tap_major->minor_lock);
 	tap = idr_find(&tap_major->minor_idr, minor);
 	if (tap) {
 		dev = tap->dev;
 		dev_hold(dev);
 	}
-	mutex_unlock(&tap_major->minor_lock);
+	spin_unlock(&tap_major->minor_lock);
 
 unlock:
 	rcu_read_unlock();
@@ -1244,7 +1244,7 @@ static int tap_list_add(dev_t major, const char *device_name)
 	tap_major->major = MAJOR(major);
 
 	idr_init(&tap_major->minor_idr);
-	mutex_init(&tap_major->minor_lock);
+	spin_lock_init(&tap_major->minor_lock);
 
 	tap_major->device_name = device_name;
 

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index dcde596..7e689c8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c

@@ -4934,6 +4934,11 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 		cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true,
 					GFP_KERNEL);
 	} else if (ieee80211_is_action(mgmt->frame_control)) {
+		if (len > BRCMF_FIL_ACTION_FRAME_SIZE + DOT11_MGMT_HDR_LEN) {
+			brcmf_err("invalid action frame length\n");
+			err = -EINVAL;
+			goto exit;
+		}
 		af_params = kzalloc(sizeof(*af_params), GFP_KERNEL);
 		if (af_params == NULL) {
 			brcmf_err("unable to allocate frame\n");

diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
index 7116472..a89243c 100644
--- a/drivers/ntb/hw/Kconfig
+++ b/drivers/ntb/hw/Kconfig

@@ -1,2 +1,3 @@
 source "drivers/ntb/hw/amd/Kconfig"
+source "drivers/ntb/hw/idt/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"

diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
index 532e085..87332c3 100644
--- a/drivers/ntb/hw/Makefile
+++ b/drivers/ntb/hw/Makefile

@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_AMD)	+= amd/
+obj-$(CONFIG_NTB_IDT)	+= idt/
 obj-$(CONFIG_NTB_INTEL)	+= intel/

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 019a158..f0788aa 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c

@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -13,6 +14,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -79,40 +81,42 @@ static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
 	return 1 << idx;
 }
 
-static int amd_ntb_mw_count(struct ntb_dev *ntb)
+static int amd_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				phys_addr_t *base,
-				resource_size_t *size,
-				resource_size_t *align,
-				resource_size_t *align_size)
+static int amd_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar);
+	if (addr_align)
+		*addr_align = SZ_4K;
 
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar);
+	if (size_align)
+		*size_align = 1;
 
-	if (align)
-		*align = SZ_4K;
-
-	if (align_size)
-		*align_size = 1;
+	if (size_max)
+		*size_max = pci_resource_len(ndev->ntb.pdev, bar);
 
 	return 0;
 }
 
-static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				dma_addr_t addr, resource_size_t size)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -122,11 +126,14 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base_addr, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	bar = ndev_mw_to_bar(ndev, idx);
 	if (bar < 0)
 		return bar;
 
-	mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+	mw_size = pci_resource_len(ntb->pdev, bar);
 
 	/* make sure the range fits in the usable mw size */
 	if (size > mw_size)
@@ -135,7 +142,7 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	mmio = ndev->self_mmio;
 	peer_mmio = ndev->peer_mmio;
 
-	base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+	base_addr = pci_resource_start(ntb->pdev, bar);
 
 	if (bar != 1) {
 		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 2);
@@ -212,7 +219,7 @@ static int amd_link_is_up(struct amd_ntb_dev *ndev)
 	return 0;
 }
 
-static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 amd_ntb_link_is_up(struct ntb_dev *ntb,
 			      enum ntb_speed *speed,
 			      enum ntb_width *width)
 {
@@ -225,7 +232,7 @@ static int amd_ntb_link_is_up(struct ntb_dev *ntb,
 		if (width)
 			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
 
-		dev_dbg(ndev_dev(ndev), "link is up.\n");
+		dev_dbg(&ntb->pdev->dev, "link is up.\n");
 
 		ret = 1;
 	} else {
@@ -234,7 +241,7 @@ static int amd_ntb_link_is_up(struct ntb_dev *ntb,
 		if (width)
 			*width = NTB_WIDTH_NONE;
 
-		dev_dbg(ndev_dev(ndev), "link is down.\n");
+		dev_dbg(&ntb->pdev->dev, "link is down.\n");
 	}
 
 	return ret;
@@ -254,7 +261,7 @@ static int amd_ntb_link_enable(struct ntb_dev *ntb,
 
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
-	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
 
 	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
 	ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
@@ -275,7 +282,7 @@ static int amd_ntb_link_disable(struct ntb_dev *ntb)
 
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
-	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+	dev_dbg(&ntb->pdev->dev, "Enabling Link.\n");
 
 	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
 	ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
@@ -284,6 +291,31 @@ static int amd_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int amd_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* The same as for inbound MWs */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	return 0;
+}
+
 static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
 {
 	return ntb_ndev(ntb)->db_valid_mask;
@@ -400,30 +432,30 @@ static int amd_ntb_spad_write(struct ntb_dev *ntb,
 	return 0;
 }
 
-static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	return readl(mmio + AMD_SPAD_OFFSET + offset);
 }
 
-static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
-				   int idx, u32 val)
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				   int sidx, u32 val)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
 	void __iomem *mmio = ndev->self_mmio;
 	u32 offset;
 
-	if (idx < 0 || idx >= ndev->spad_count)
+	if (sidx < 0 || sidx >= ndev->spad_count)
 		return -EINVAL;
 
-	offset = ndev->peer_spad + (idx << 2);
+	offset = ndev->peer_spad + (sidx << 2);
 	writel(val, mmio + AMD_SPAD_OFFSET + offset);
 
 	return 0;
@@ -431,8 +463,10 @@ static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
 
 static const struct ntb_dev_ops amd_ntb_ops = {
 	.mw_count		= amd_ntb_mw_count,
-	.mw_get_range		= amd_ntb_mw_get_range,
+	.mw_get_align		= amd_ntb_mw_get_align,
 	.mw_set_trans		= amd_ntb_mw_set_trans,
+	.peer_mw_count		= amd_ntb_peer_mw_count,
+	.peer_mw_get_addr	= amd_ntb_peer_mw_get_addr,
 	.link_is_up		= amd_ntb_link_is_up,
 	.link_enable		= amd_ntb_link_enable,
 	.link_disable		= amd_ntb_link_disable,
@@ -466,18 +500,19 @@ static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
 static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 {
 	void __iomem *mmio = ndev->self_mmio;
+	struct device *dev = &ndev->ntb.pdev->dev;
 	u32 status;
 
 	status = readl(mmio + AMD_INTSTAT_OFFSET);
 	if (!(status & AMD_EVENT_INTMASK))
 		return;
 
-	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+	dev_dbg(dev, "status = 0x%x and vec = %d\n", status, vec);
 
 	status &= AMD_EVENT_INTMASK;
 	switch (status) {
 	case AMD_PEER_FLUSH_EVENT:
-		dev_info(ndev_dev(ndev), "Flush is done.\n");
+		dev_info(dev, "Flush is done.\n");
 		break;
 	case AMD_PEER_RESET_EVENT:
 		amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
@@ -503,7 +538,7 @@ static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 		status = readl(mmio + AMD_PMESTAT_OFFSET);
 		/* check if this is WAKEUP event */
 		if (status & 0x1)
-			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+			dev_info(dev, "Wakeup is done.\n");
 
 		amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
 
@@ -512,14 +547,14 @@ static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
 				      AMD_LINK_HB_TIMEOUT);
 		break;
 	default:
-		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+		dev_info(dev, "event status = 0x%x.\n", status);
 		break;
 	}
 }
 
 static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
 {
-	dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d\n", vec);
 
 	if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
 		amd_handle_event(ndev, vec);
@@ -541,7 +576,7 @@ static irqreturn_t ndev_irq_isr(int irq, void *dev)
 {
 	struct amd_ntb_dev *ndev = dev;
 
-	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
 }
 
 static int ndev_init_isr(struct amd_ntb_dev *ndev,
@@ -550,7 +585,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	struct pci_dev *pdev;
 	int rc, i, msix_count, node;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	node = dev_to_node(&pdev->dev);
 
@@ -592,7 +627,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 			goto err_msix_request;
 	}
 
-	dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+	dev_dbg(&pdev->dev, "Using msix interrupts\n");
 	ndev->db_count = msix_min;
 	ndev->msix_vec_count = msix_max;
 	return 0;
@@ -619,7 +654,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	if (rc)
 		goto err_msi_request;
 
-	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
 	ndev->db_count = 1;
 	ndev->msix_vec_count = 1;
 	return 0;
@@ -636,7 +671,7 @@ static int ndev_init_isr(struct amd_ntb_dev *ndev,
 	if (rc)
 		goto err_intx_request;
 
-	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
 	ndev->db_count = 1;
 	ndev->msix_vec_count = 1;
 	return 0;
@@ -651,7 +686,7 @@ static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
 	void __iomem *mmio = ndev->self_mmio;
 	int i;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	/* Mask all doorbell interrupts */
 	ndev->db_mask = ndev->db_valid_mask;
@@ -777,7 +812,8 @@ static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
 		ndev->debugfs_info = NULL;
 	} else {
 		ndev->debugfs_dir =
-			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
 		if (!ndev->debugfs_dir)
 			ndev->debugfs_info = NULL;
 		else
@@ -812,7 +848,7 @@ static int amd_poll_link(struct amd_ntb_dev *ndev)
 	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
 	reg &= NTB_LIN_STA_ACTIVE_BIT;
 
-	dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+	dev_dbg(&ndev->ntb.pdev->dev, "%s: reg_val = 0x%x.\n", __func__, reg);
 
 	if (reg == ndev->cntl_sta)
 		return 0;
@@ -894,7 +930,8 @@ static int amd_init_ntb(struct amd_ntb_dev *ndev)
 
 		break;
 	default:
-		dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+		dev_err(&ndev->ntb.pdev->dev,
+			"AMD NTB does not support B2B mode.\n");
 		return -EINVAL;
 	}
 
@@ -923,10 +960,10 @@ static int amd_init_dev(struct amd_ntb_dev *ndev)
 	struct pci_dev *pdev;
 	int rc = 0;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	ndev->ntb.topo = amd_get_topo(ndev);
-	dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+	dev_dbg(&pdev->dev, "AMD NTB topo is %s\n",
 		ntb_topo_string(ndev->ntb.topo));
 
 	rc = amd_init_ntb(ndev);
@@ -935,7 +972,7 @@ static int amd_init_dev(struct amd_ntb_dev *ndev)
 
 	rc = amd_init_isr(ndev);
 	if (rc) {
-		dev_err(ndev_dev(ndev), "fail to init isr.\n");
+		dev_err(&pdev->dev, "fail to init isr.\n");
 		return rc;
 	}
 
@@ -973,7 +1010,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -981,7 +1018,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
 	}
 
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
@@ -1004,7 +1041,7 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
 
 static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
 {
-	struct pci_dev *pdev = ndev_pdev(ndev);
+	struct pci_dev *pdev = ndev->ntb.pdev;
 
 	pci_iounmap(pdev, ndev->self_mmio);
 

diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
index 13d73ed..8f3617a 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.h
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h

@@ -211,9 +211,6 @@ struct amd_ntb_dev {
 	struct dentry *debugfs_info;
 };
 
-#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
-#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
-#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
 #define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
 #define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
 

diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig
new file mode 100644
index 0000000..b360e56
--- /dev/null
+++ b/drivers/ntb/hw/idt/Kconfig

@@ -0,0 +1,31 @@
+config NTB_IDT
+	tristate "IDT PCIe-switch Non-Transparent Bridge support"
+	depends on PCI
+	help
+	 This driver supports NTB of cappable IDT PCIe-switches.
+
+	 Some of the pre-initializations must be made before IDT PCIe-switch
+	 exposes it NT-functions correctly. It should be done by either proper
+	 initialisation of EEPROM connected to master smbus of the switch or
+	 by BIOS using slave-SMBus interface changing corresponding registers
+	 value. Evidently it must be done before PCI bus enumeration is
+	 finished in Linux kernel.
+
+	 First of all partitions must be activated and properly assigned to all
+	 the ports with NT-functions intended to be activated (see SWPARTxCTL
+	 and SWPORTxCTL registers). Then all NT-function BARs must be enabled
+	 with chosen valid aperture. For memory windows related BARs the
+	 aperture settings shall determine the maximum size of memory windows
+	 accepted by a BAR. Note that BAR0 must map PCI configuration space
+	 registers.
+
+	 It's worth to note, that since a part of this driver relies on the
+	 BAR settings of peer NT-functions, the BAR setups can't be done over
+	 kernel PCI fixups. That's why the alternative pre-initialization
+	 techniques like BIOS using SMBus interface or EEPROM should be
+	 utilized. Additionally if one needs to have temperature sensor
+	 information printed to system log, the corresponding registers must
+	 be initialized within BIOS/EEPROM as well.
+
+	 If unsure, say N.
+

diff --git a/drivers/ntb/hw/idt/Makefile b/drivers/ntb/hw/idt/Makefile
new file mode 100644
index 0000000..a102cf1
--- /dev/null
+++ b/drivers/ntb/hw/idt/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_IDT) += ntb_hw_idt.o

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
new file mode 100644
index 0000000..d44d7ef
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c

@@ -0,0 +1,2712 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/sizes.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_idt.h"
+
+#define NTB_NAME	"ntb_hw_idt"
+#define NTB_DESC	"IDT PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"2.0"
+#define NTB_IRQNAME	"ntb_irq_idt"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("T-platforms");
+
+/*
+ * NT Endpoint registers table simplifying a loop access to the functionally
+ * related registers
+ */
+static const struct idt_ntb_regs ntdata_tbl = {
+	{ {IDT_NT_BARSETUP0,	IDT_NT_BARLIMIT0,
+	   IDT_NT_BARLTBASE0,	IDT_NT_BARUTBASE0},
+	  {IDT_NT_BARSETUP1,	IDT_NT_BARLIMIT1,
+	   IDT_NT_BARLTBASE1,	IDT_NT_BARUTBASE1},
+	  {IDT_NT_BARSETUP2,	IDT_NT_BARLIMIT2,
+	   IDT_NT_BARLTBASE2,	IDT_NT_BARUTBASE2},
+	  {IDT_NT_BARSETUP3,	IDT_NT_BARLIMIT3,
+	   IDT_NT_BARLTBASE3,	IDT_NT_BARUTBASE3},
+	  {IDT_NT_BARSETUP4,	IDT_NT_BARLIMIT4,
+	   IDT_NT_BARLTBASE4,	IDT_NT_BARUTBASE4},
+	  {IDT_NT_BARSETUP5,	IDT_NT_BARLIMIT5,
+	   IDT_NT_BARLTBASE5,	IDT_NT_BARUTBASE5} },
+	{ {IDT_NT_INMSG0,	IDT_NT_OUTMSG0,	IDT_NT_INMSGSRC0},
+	  {IDT_NT_INMSG1,	IDT_NT_OUTMSG1,	IDT_NT_INMSGSRC1},
+	  {IDT_NT_INMSG2,	IDT_NT_OUTMSG2,	IDT_NT_INMSGSRC2},
+	  {IDT_NT_INMSG3,	IDT_NT_OUTMSG3,	IDT_NT_INMSGSRC3} }
+};
+
+/*
+ * NT Endpoint ports data table with the corresponding pcie command, link
+ * status, control and BAR-related registers
+ */
+static const struct idt_ntb_port portdata_tbl[IDT_MAX_NR_PORTS] = {
+/*0*/	{ IDT_SW_NTP0_PCIECMDSTS,	IDT_SW_NTP0_PCIELCTLSTS,
+	  IDT_SW_NTP0_NTCTL,
+	  IDT_SW_SWPORT0CTL,		IDT_SW_SWPORT0STS,
+	  { {IDT_SW_NTP0_BARSETUP0,	IDT_SW_NTP0_BARLIMIT0,
+	     IDT_SW_NTP0_BARLTBASE0,	IDT_SW_NTP0_BARUTBASE0},
+	    {IDT_SW_NTP0_BARSETUP1,	IDT_SW_NTP0_BARLIMIT1,
+	     IDT_SW_NTP0_BARLTBASE1,	IDT_SW_NTP0_BARUTBASE1},
+	    {IDT_SW_NTP0_BARSETUP2,	IDT_SW_NTP0_BARLIMIT2,
+	     IDT_SW_NTP0_BARLTBASE2,	IDT_SW_NTP0_BARUTBASE2},
+	    {IDT_SW_NTP0_BARSETUP3,	IDT_SW_NTP0_BARLIMIT3,
+	     IDT_SW_NTP0_BARLTBASE3,	IDT_SW_NTP0_BARUTBASE3},
+	    {IDT_SW_NTP0_BARSETUP4,	IDT_SW_NTP0_BARLIMIT4,
+	     IDT_SW_NTP0_BARLTBASE4,	IDT_SW_NTP0_BARUTBASE4},
+	    {IDT_SW_NTP0_BARSETUP5,	IDT_SW_NTP0_BARLIMIT5,
+	     IDT_SW_NTP0_BARLTBASE5,	IDT_SW_NTP0_BARUTBASE5} } },
+/*1*/	{0},
+/*2*/	{ IDT_SW_NTP2_PCIECMDSTS,	IDT_SW_NTP2_PCIELCTLSTS,
+	  IDT_SW_NTP2_NTCTL,
+	  IDT_SW_SWPORT2CTL,		IDT_SW_SWPORT2STS,
+	  { {IDT_SW_NTP2_BARSETUP0,	IDT_SW_NTP2_BARLIMIT0,
+	     IDT_SW_NTP2_BARLTBASE0,	IDT_SW_NTP2_BARUTBASE0},
+	    {IDT_SW_NTP2_BARSETUP1,	IDT_SW_NTP2_BARLIMIT1,
+	     IDT_SW_NTP2_BARLTBASE1,	IDT_SW_NTP2_BARUTBASE1},
+	    {IDT_SW_NTP2_BARSETUP2,	IDT_SW_NTP2_BARLIMIT2,
+	     IDT_SW_NTP2_BARLTBASE2,	IDT_SW_NTP2_BARUTBASE2},
+	    {IDT_SW_NTP2_BARSETUP3,	IDT_SW_NTP2_BARLIMIT3,
+	     IDT_SW_NTP2_BARLTBASE3,	IDT_SW_NTP2_BARUTBASE3},
+	    {IDT_SW_NTP2_BARSETUP4,	IDT_SW_NTP2_BARLIMIT4,
+	     IDT_SW_NTP2_BARLTBASE4,	IDT_SW_NTP2_BARUTBASE4},
+	    {IDT_SW_NTP2_BARSETUP5,	IDT_SW_NTP2_BARLIMIT5,
+	     IDT_SW_NTP2_BARLTBASE5,	IDT_SW_NTP2_BARUTBASE5} } },
+/*3*/	{0},
+/*4*/	{ IDT_SW_NTP4_PCIECMDSTS,	IDT_SW_NTP4_PCIELCTLSTS,
+	  IDT_SW_NTP4_NTCTL,
+	  IDT_SW_SWPORT4CTL,		IDT_SW_SWPORT4STS,
+	  { {IDT_SW_NTP4_BARSETUP0,	IDT_SW_NTP4_BARLIMIT0,
+	     IDT_SW_NTP4_BARLTBASE0,	IDT_SW_NTP4_BARUTBASE0},
+	    {IDT_SW_NTP4_BARSETUP1,	IDT_SW_NTP4_BARLIMIT1,
+	     IDT_SW_NTP4_BARLTBASE1,	IDT_SW_NTP4_BARUTBASE1},
+	    {IDT_SW_NTP4_BARSETUP2,	IDT_SW_NTP4_BARLIMIT2,
+	     IDT_SW_NTP4_BARLTBASE2,	IDT_SW_NTP4_BARUTBASE2},
+	    {IDT_SW_NTP4_BARSETUP3,	IDT_SW_NTP4_BARLIMIT3,
+	     IDT_SW_NTP4_BARLTBASE3,	IDT_SW_NTP4_BARUTBASE3},
+	    {IDT_SW_NTP4_BARSETUP4,	IDT_SW_NTP4_BARLIMIT4,
+	     IDT_SW_NTP4_BARLTBASE4,	IDT_SW_NTP4_BARUTBASE4},
+	    {IDT_SW_NTP4_BARSETUP5,	IDT_SW_NTP4_BARLIMIT5,
+	     IDT_SW_NTP4_BARLTBASE5,	IDT_SW_NTP4_BARUTBASE5} } },
+/*5*/	{0},
+/*6*/	{ IDT_SW_NTP6_PCIECMDSTS,	IDT_SW_NTP6_PCIELCTLSTS,
+	  IDT_SW_NTP6_NTCTL,
+	  IDT_SW_SWPORT6CTL,		IDT_SW_SWPORT6STS,
+	  { {IDT_SW_NTP6_BARSETUP0,	IDT_SW_NTP6_BARLIMIT0,
+	     IDT_SW_NTP6_BARLTBASE0,	IDT_SW_NTP6_BARUTBASE0},
+	    {IDT_SW_NTP6_BARSETUP1,	IDT_SW_NTP6_BARLIMIT1,
+	     IDT_SW_NTP6_BARLTBASE1,	IDT_SW_NTP6_BARUTBASE1},
+	    {IDT_SW_NTP6_BARSETUP2,	IDT_SW_NTP6_BARLIMIT2,
+	     IDT_SW_NTP6_BARLTBASE2,	IDT_SW_NTP6_BARUTBASE2},
+	    {IDT_SW_NTP6_BARSETUP3,	IDT_SW_NTP6_BARLIMIT3,
+	     IDT_SW_NTP6_BARLTBASE3,	IDT_SW_NTP6_BARUTBASE3},
+	    {IDT_SW_NTP6_BARSETUP4,	IDT_SW_NTP6_BARLIMIT4,
+	     IDT_SW_NTP6_BARLTBASE4,	IDT_SW_NTP6_BARUTBASE4},
+	    {IDT_SW_NTP6_BARSETUP5,	IDT_SW_NTP6_BARLIMIT5,
+	     IDT_SW_NTP6_BARLTBASE5,	IDT_SW_NTP6_BARUTBASE5} } },
+/*7*/	{0},
+/*8*/	{ IDT_SW_NTP8_PCIECMDSTS,	IDT_SW_NTP8_PCIELCTLSTS,
+	  IDT_SW_NTP8_NTCTL,
+	  IDT_SW_SWPORT8CTL,		IDT_SW_SWPORT8STS,
+	  { {IDT_SW_NTP8_BARSETUP0,	IDT_SW_NTP8_BARLIMIT0,
+	     IDT_SW_NTP8_BARLTBASE0,	IDT_SW_NTP8_BARUTBASE0},
+	    {IDT_SW_NTP8_BARSETUP1,	IDT_SW_NTP8_BARLIMIT1,
+	     IDT_SW_NTP8_BARLTBASE1,	IDT_SW_NTP8_BARUTBASE1},
+	    {IDT_SW_NTP8_BARSETUP2,	IDT_SW_NTP8_BARLIMIT2,
+	     IDT_SW_NTP8_BARLTBASE2,	IDT_SW_NTP8_BARUTBASE2},
+	    {IDT_SW_NTP8_BARSETUP3,	IDT_SW_NTP8_BARLIMIT3,
+	     IDT_SW_NTP8_BARLTBASE3,	IDT_SW_NTP8_BARUTBASE3},
+	    {IDT_SW_NTP8_BARSETUP4,	IDT_SW_NTP8_BARLIMIT4,
+	     IDT_SW_NTP8_BARLTBASE4,	IDT_SW_NTP8_BARUTBASE4},
+	    {IDT_SW_NTP8_BARSETUP5,	IDT_SW_NTP8_BARLIMIT5,
+	     IDT_SW_NTP8_BARLTBASE5,	IDT_SW_NTP8_BARUTBASE5} } },
+/*9*/	{0},
+/*10*/	{0},
+/*11*/	{0},
+/*12*/	{ IDT_SW_NTP12_PCIECMDSTS,	IDT_SW_NTP12_PCIELCTLSTS,
+	  IDT_SW_NTP12_NTCTL,
+	  IDT_SW_SWPORT12CTL,		IDT_SW_SWPORT12STS,
+	  { {IDT_SW_NTP12_BARSETUP0,	IDT_SW_NTP12_BARLIMIT0,
+	     IDT_SW_NTP12_BARLTBASE0,	IDT_SW_NTP12_BARUTBASE0},
+	    {IDT_SW_NTP12_BARSETUP1,	IDT_SW_NTP12_BARLIMIT1,
+	     IDT_SW_NTP12_BARLTBASE1,	IDT_SW_NTP12_BARUTBASE1},
+	    {IDT_SW_NTP12_BARSETUP2,	IDT_SW_NTP12_BARLIMIT2,
+	     IDT_SW_NTP12_BARLTBASE2,	IDT_SW_NTP12_BARUTBASE2},
+	    {IDT_SW_NTP12_BARSETUP3,	IDT_SW_NTP12_BARLIMIT3,
+	     IDT_SW_NTP12_BARLTBASE3,	IDT_SW_NTP12_BARUTBASE3},
+	    {IDT_SW_NTP12_BARSETUP4,	IDT_SW_NTP12_BARLIMIT4,
+	     IDT_SW_NTP12_BARLTBASE4,	IDT_SW_NTP12_BARUTBASE4},
+	    {IDT_SW_NTP12_BARSETUP5,	IDT_SW_NTP12_BARLIMIT5,
+	     IDT_SW_NTP12_BARLTBASE5,	IDT_SW_NTP12_BARUTBASE5} } },
+/*13*/	{0},
+/*14*/	{0},
+/*15*/	{0},
+/*16*/	{ IDT_SW_NTP16_PCIECMDSTS,	IDT_SW_NTP16_PCIELCTLSTS,
+	  IDT_SW_NTP16_NTCTL,
+	  IDT_SW_SWPORT16CTL,		IDT_SW_SWPORT16STS,
+	  { {IDT_SW_NTP16_BARSETUP0,	IDT_SW_NTP16_BARLIMIT0,
+	     IDT_SW_NTP16_BARLTBASE0,	IDT_SW_NTP16_BARUTBASE0},
+	    {IDT_SW_NTP16_BARSETUP1,	IDT_SW_NTP16_BARLIMIT1,
+	     IDT_SW_NTP16_BARLTBASE1,	IDT_SW_NTP16_BARUTBASE1},
+	    {IDT_SW_NTP16_BARSETUP2,	IDT_SW_NTP16_BARLIMIT2,
+	     IDT_SW_NTP16_BARLTBASE2,	IDT_SW_NTP16_BARUTBASE2},
+	    {IDT_SW_NTP16_BARSETUP3,	IDT_SW_NTP16_BARLIMIT3,
+	     IDT_SW_NTP16_BARLTBASE3,	IDT_SW_NTP16_BARUTBASE3},
+	    {IDT_SW_NTP16_BARSETUP4,	IDT_SW_NTP16_BARLIMIT4,
+	     IDT_SW_NTP16_BARLTBASE4,	IDT_SW_NTP16_BARUTBASE4},
+	    {IDT_SW_NTP16_BARSETUP5,	IDT_SW_NTP16_BARLIMIT5,
+	     IDT_SW_NTP16_BARLTBASE5,	IDT_SW_NTP16_BARUTBASE5} } },
+/*17*/	{0},
+/*18*/	{0},
+/*19*/	{0},
+/*20*/	{ IDT_SW_NTP20_PCIECMDSTS,	IDT_SW_NTP20_PCIELCTLSTS,
+	  IDT_SW_NTP20_NTCTL,
+	  IDT_SW_SWPORT20CTL,		IDT_SW_SWPORT20STS,
+	  { {IDT_SW_NTP20_BARSETUP0,	IDT_SW_NTP20_BARLIMIT0,
+	     IDT_SW_NTP20_BARLTBASE0,	IDT_SW_NTP20_BARUTBASE0},
+	    {IDT_SW_NTP20_BARSETUP1,	IDT_SW_NTP20_BARLIMIT1,
+	     IDT_SW_NTP20_BARLTBASE1,	IDT_SW_NTP20_BARUTBASE1},
+	    {IDT_SW_NTP20_BARSETUP2,	IDT_SW_NTP20_BARLIMIT2,
+	     IDT_SW_NTP20_BARLTBASE2,	IDT_SW_NTP20_BARUTBASE2},
+	    {IDT_SW_NTP20_BARSETUP3,	IDT_SW_NTP20_BARLIMIT3,
+	     IDT_SW_NTP20_BARLTBASE3,	IDT_SW_NTP20_BARUTBASE3},
+	    {IDT_SW_NTP20_BARSETUP4,	IDT_SW_NTP20_BARLIMIT4,
+	     IDT_SW_NTP20_BARLTBASE4,	IDT_SW_NTP20_BARUTBASE4},
+	    {IDT_SW_NTP20_BARSETUP5,	IDT_SW_NTP20_BARLIMIT5,
+	     IDT_SW_NTP20_BARLTBASE5,	IDT_SW_NTP20_BARUTBASE5} } },
+/*21*/	{0},
+/*22*/	{0},
+/*23*/	{0}
+};
+
+/*
+ * IDT PCIe-switch partitions table with the corresponding control, status
+ * and messages control registers
+ */
+static const struct idt_ntb_part partdata_tbl[IDT_MAX_NR_PARTS] = {
+/*0*/	{ IDT_SW_SWPART0CTL,	IDT_SW_SWPART0STS,
+	  {IDT_SW_SWP0MSGCTL0,	IDT_SW_SWP0MSGCTL1,
+	   IDT_SW_SWP0MSGCTL2,	IDT_SW_SWP0MSGCTL3} },
+/*1*/	{ IDT_SW_SWPART1CTL,	IDT_SW_SWPART1STS,
+	  {IDT_SW_SWP1MSGCTL0,	IDT_SW_SWP1MSGCTL1,
+	   IDT_SW_SWP1MSGCTL2,	IDT_SW_SWP1MSGCTL3} },
+/*2*/	{ IDT_SW_SWPART2CTL,	IDT_SW_SWPART2STS,
+	  {IDT_SW_SWP2MSGCTL0,	IDT_SW_SWP2MSGCTL1,
+	   IDT_SW_SWP2MSGCTL2,	IDT_SW_SWP2MSGCTL3} },
+/*3*/	{ IDT_SW_SWPART3CTL,	IDT_SW_SWPART3STS,
+	  {IDT_SW_SWP3MSGCTL0,	IDT_SW_SWP3MSGCTL1,
+	   IDT_SW_SWP3MSGCTL2,	IDT_SW_SWP3MSGCTL3} },
+/*4*/	{ IDT_SW_SWPART4CTL,	IDT_SW_SWPART4STS,
+	  {IDT_SW_SWP4MSGCTL0,	IDT_SW_SWP4MSGCTL1,
+	   IDT_SW_SWP4MSGCTL2,	IDT_SW_SWP4MSGCTL3} },
+/*5*/	{ IDT_SW_SWPART5CTL,	IDT_SW_SWPART5STS,
+	  {IDT_SW_SWP5MSGCTL0,	IDT_SW_SWP5MSGCTL1,
+	   IDT_SW_SWP5MSGCTL2,	IDT_SW_SWP5MSGCTL3} },
+/*6*/	{ IDT_SW_SWPART6CTL,	IDT_SW_SWPART6STS,
+	  {IDT_SW_SWP6MSGCTL0,	IDT_SW_SWP6MSGCTL1,
+	   IDT_SW_SWP6MSGCTL2,	IDT_SW_SWP6MSGCTL3} },
+/*7*/	{ IDT_SW_SWPART7CTL,	IDT_SW_SWPART7STS,
+	  {IDT_SW_SWP7MSGCTL0,	IDT_SW_SWP7MSGCTL1,
+	   IDT_SW_SWP7MSGCTL2,	IDT_SW_SWP7MSGCTL3} }
+};
+
+/*
+ * DebugFS directory to place the driver debug file
+ */
+static struct dentry *dbgfs_topdir;
+
+/*=============================================================================
+ *                1. IDT PCIe-switch registers IO-functions
+ *
+ *    Beside ordinary configuration space registers IDT PCIe-switch expose
+ * global configuration registers, which are used to determine state of other
+ * device ports as well as being notified of some switch-related events.
+ * Additionally all the configuration space registers of all the IDT
+ * PCIe-switch functions are mapped to the Global Address space, so each
+ * function can determine a configuration of any other PCI-function.
+ *    Functions declared in this chapter are created to encapsulate access
+ * to configuration and global registers, so the driver code just need to
+ * provide IDT NTB hardware descriptor and a register address.
+ *=============================================================================
+ */
+
+/*
+ * idt_nt_write() - PCI configuration space registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch registers are all Little endian.
+ */
+static void idt_nt_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Just write the value to the specified register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_nt_read() - PCI configuration space registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_nt_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_PCI_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Just read the value from the specified register */
+	return ioread32(ndev->cfgspc + (ptrdiff_t)reg);
+}
+
+/*
+ * idt_sw_write() - Global registers write method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ * @data:	Value to write to the register
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ */
+static void idt_sw_write(struct idt_ntb_dev *ndev,
+			 const unsigned int reg, const u32 data)
+{
+	unsigned long irqflags;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Put the new value of the register */
+	iowrite32(data, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Make sure the PCIe transactions are executed */
+	mmiowb();
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+}
+
+/*
+ * idt_sw_read() - Global registers read method
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to write data to
+ *
+ * IDT PCIe-switch Global configuration registers are all Little endian.
+ *
+ * Return: register value
+ */
+static u32 idt_sw_read(struct idt_ntb_dev *ndev, const unsigned int reg)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/*
+	 * It's obvious bug to request a register exceeding the maximum possible
+	 * value as well as to have it unaligned.
+	 */
+	if (WARN_ON(reg > IDT_REG_SW_MAX || !IS_ALIGNED(reg, IDT_REG_ALIGN)))
+		return ~0;
+
+	/* Lock GASA registers operations */
+	spin_lock_irqsave(&ndev->gasa_lock, irqflags);
+	/* Set the global register address */
+	iowrite32((u32)reg, ndev->cfgspc + (ptrdiff_t)IDT_NT_GASAADDR);
+	/* Get the data of the register (read ops acts as MMIO barrier) */
+	data = ioread32(ndev->cfgspc + (ptrdiff_t)IDT_NT_GASADATA);
+	/* Unlock GASA registers operations */
+	spin_unlock_irqrestore(&ndev->gasa_lock, irqflags);
+
+	return data;
+}
+
+/*
+ * idt_reg_set_bits() - set bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @valid_mask:	Mask of valid bits
+ * @set_bits:	Bitmask to set
+ *
+ * Helper method to check whether a passed bitfield is valid and set
+ * corresponding bits of a register.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to be used concurrently).
+ *
+ * Return: zero on success, negative error on invalid bitmask.
+ */
+static inline int idt_reg_set_bits(struct idt_ntb_dev *ndev, unsigned int reg,
+				   spinlock_t *reg_lock,
+				   u64 valid_mask, u64 set_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	if (set_bits & ~(u64)valid_mask)
+		return -EINVAL;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) | (u32)set_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+
+	return 0;
+}
+
+/*
+ * idt_reg_clear_bits() - clear bits of a passed register
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @reg:	Register to change bits of
+ * @reg_lock:	Register access spin lock
+ * @set_bits:	Bitmask to clear
+ *
+ * Helper method to check whether a passed bitfield is valid and clear
+ * corresponding bits of a register.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * WARNING! Make sure the passed register isn't accessed over plane
+ * idt_nt_write() method (read method is ok to use concurrently).
+ */
+static inline void idt_reg_clear_bits(struct idt_ntb_dev *ndev,
+				     unsigned int reg, spinlock_t *reg_lock,
+				     u64 clear_bits)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Lock access to the register unless the change is written back */
+	spin_lock_irqsave(reg_lock, irqflags);
+	data = idt_nt_read(ndev, reg) & ~(u32)clear_bits;
+	idt_nt_write(ndev, reg, data);
+	/* Unlock the register */
+	spin_unlock_irqrestore(reg_lock, irqflags);
+}
+
+/*===========================================================================
+ *                           2. Ports operations
+ *
+ *    IDT PCIe-switches can have from 3 up to 8 ports with possible
+ * NT-functions enabled. So all the possible ports need to be scanned looking
+ * for NTB activated. NTB API will have enumerated only the ports with NTB.
+ *===========================================================================
+ */
+
+/*
+ * idt_scan_ports() - scan IDT PCIe-switch ports collecting info in the tables
+ * @ndev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_scan_ports(struct idt_ntb_dev *ndev)
+{
+	unsigned char pidx, port, part;
+	u32 data, portsts, partsts;
+
+	/* Retrieve the local port number */
+	data = idt_nt_read(ndev, IDT_NT_PCIELCAP);
+	ndev->port = GET_FIELD(PCIELCAP_PORTNUM, data);
+
+	/* Retrieve the local partition number */
+	portsts = idt_sw_read(ndev, portdata_tbl[ndev->port].sts);
+	ndev->part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+	/* Initialize port/partition -> index tables with invalid values */
+	memset(ndev->port_idx_map, -EINVAL, sizeof(ndev->port_idx_map));
+	memset(ndev->part_idx_map, -EINVAL, sizeof(ndev->part_idx_map));
+
+	/*
+	 * Walk over all the possible ports checking whether any of them has
+	 * NT-function activated
+	 */
+	ndev->peer_cnt = 0;
+	for (pidx = 0; pidx < ndev->swcfg->port_cnt; pidx++) {
+		port = ndev->swcfg->ports[pidx];
+		/* Skip local port */
+		if (port == ndev->port)
+			continue;
+
+		/* Read the port status register to get it partition */
+		portsts = idt_sw_read(ndev, portdata_tbl[port].sts);
+		part = GET_FIELD(SWPORTxSTS_SWPART, portsts);
+
+		/* Retrieve the partition status */
+		partsts = idt_sw_read(ndev, partdata_tbl[part].sts);
+		/* Check if partition state is active and port has NTB */
+		if (IS_FLD_SET(SWPARTxSTS_STATE, partsts, ACT) &&
+		    (IS_FLD_SET(SWPORTxSTS_MODE, portsts, NT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNT) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, USNTDMA) ||
+		     IS_FLD_SET(SWPORTxSTS_MODE, portsts, NTDMA))) {
+			/* Save the port and partition numbers */
+			ndev->peers[ndev->peer_cnt].port = port;
+			ndev->peers[ndev->peer_cnt].part = part;
+			/* Fill in the port/partition -> index tables */
+			ndev->port_idx_map[port] = ndev->peer_cnt;
+			ndev->part_idx_map[part] = ndev->peer_cnt;
+			ndev->peer_cnt++;
+		}
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local port: %hhu, num of peers: %hhu\n",
+		ndev->port, ndev->peer_cnt);
+
+	/* It's useless to have this driver loaded if there is no any peer */
+	if (ndev->peer_cnt == 0) {
+		dev_warn(&ndev->ntb.pdev->dev, "No active peer found\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Return: the local port number
+ */
+static int idt_ntb_port_number(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->port;
+}
+
+/*
+ * idt_ntb_peer_port_count() - get the number of peer ports
+ * @ntb:	NTB device context.
+ *
+ * Return the count of detected peer NT-functions.
+ *
+ * Return: number of peer ports
+ */
+static int idt_ntb_peer_port_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->peer_cnt;
+}
+
+/*
+ * idt_ntb_peer_port_number() - get peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Return: peer port or negative error
+ */
+static int idt_ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Return the detected NT-function port number */
+	return ndev->peers[pidx].port;
+}
+
+/*
+ * idt_ntb_peer_port_idx() - get peer port index by given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number.
+ *
+ * Internal port -> index table is pre-initialized with -EINVAL values,
+ * so we just need to return it value
+ *
+ * Return: peer NT-function port index or negative error
+ */
+static int idt_ntb_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (port < 0 || IDT_MAX_NR_PORTS <= port)
+		return -EINVAL;
+
+	return ndev->port_idx_map[port];
+}
+
+/*===========================================================================
+ *                         3. Link status operations
+ *    There is no any ready-to-use method to have peer ports notified if NTB
+ * link is set up or got down. Instead global signal can be used instead.
+ * In case if any one of ports changes local NTB link state, it sends
+ * global signal and clears corresponding global state bit. Then all the ports
+ * receive a notification of that, so to make client driver being aware of
+ * possible NTB link change.
+ *    Additionally each of active NT-functions is subscribed to PCIe-link
+ * state changes of peer ports.
+ *===========================================================================
+ */
+
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev);
+
+/*
+ * idt_init_link() - Initialize NTB link state notification subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Function performs the basic initialization of some global registers
+ * needed to enable IRQ-based notifications of PCIe Link Up/Down and
+ * Global Signal events.
+ * NOTE Since it's not possible to determine when all the NTB peer drivers are
+ * unloaded as well as have those registers accessed concurrently, we must
+ * preinitialize them with the same value and leave it uncleared on local
+ * driver unload.
+ */
+static void idt_init_link(struct idt_ntb_dev *ndev)
+{
+	u32 part_mask, port_mask, se_mask;
+	unsigned char pidx;
+
+	/* Initialize spin locker of Mapping Table access registers */
+	spin_lock_init(&ndev->mtbl_lock);
+
+	/* Walk over all detected peers collecting port and partition masks */
+	port_mask = ~BIT(ndev->port);
+	part_mask = ~BIT(ndev->part);
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		port_mask &= ~BIT(ndev->peers[pidx].port);
+		part_mask &= ~BIT(ndev->peers[pidx].part);
+	}
+
+	/* Clean the Link Up/Down and GLobal Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Unmask NT-activated partitions to receive Global Switch events */
+	idt_sw_write(ndev, IDT_SW_SEPMSK, part_mask);
+
+	/* Enable PCIe Link Up events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKUPMSK, port_mask);
+
+	/* Enable PCIe Link Down events of NT-activated ports */
+	idt_sw_write(ndev, IDT_SW_SELINKDNMSK, port_mask);
+
+	/* Unmask NT-activated partitions to receive Global Signal events */
+	idt_sw_write(ndev, IDT_SW_SEGSIGMSK, part_mask);
+
+	/* Unmask Link Up/Down and Global Switch Events */
+	se_mask = ~(IDT_SEMSK_LINKUP | IDT_SEMSK_LINKDN | IDT_SEMSK_GSIGNAL);
+	idt_sw_write(ndev, IDT_SW_SEMSK, se_mask);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events initialized");
+}
+
+/*
+ * idt_deinit_link() - deinitialize link subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just disable the link back.
+ */
+static void idt_deinit_link(struct idt_ntb_dev *ndev)
+{
+	/* Disable the link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB link status events deinitialized");
+}
+
+/*
+ * idt_se_isr() - switch events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * This driver doesn't support IDT PCIe-switch dynamic reconfigurations,
+ * Failover capability, etc, so switch events are utilized to notify of
+ * PCIe and NTB link events.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_se_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	u32 sests;
+
+	/* Read Switch Events status */
+	sests = idt_sw_read(ndev, IDT_SW_SESTS);
+
+	/* Clean the Link Up/Down and Global Signal status registers */
+	idt_sw_write(ndev, IDT_SW_SELINKUPSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SELINKDNSTS, (u32)-1);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)-1);
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_SEVENT);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "SE IRQ detected %#08x (SESTS %#08x)",
+			  ntint_sts, sests);
+
+	/* Notify the client driver of possible link state change */
+	ntb_link_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_local_link_enable() - enable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - enable Completion TLPs translation
+ * - initialize mapping table to enable the Request ID translation
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_enable(struct idt_ntb_dev *ndev)
+{
+	u32 reqid, mtbldata = 0;
+	unsigned long irqflags;
+
+	/* Enable the ID protection and Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, IDT_NTCTL_CPEN);
+
+	/* Retrieve the current Requester ID (Bus:Device:Function) */
+	reqid = idt_nt_read(ndev, IDT_NT_REQIDCAP);
+
+	/*
+	 * Set the corresponding NT Mapping table entry of port partition index
+	 * with the data to perform the Request ID translation
+	 */
+	mtbldata = SET_FIELD(NTMTBLDATA_REQID, 0, reqid) |
+		   SET_FIELD(NTMTBLDATA_PART, 0, ndev->part) |
+		   IDT_NTMTBLDATA_VALID;
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, mtbldata);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_disable() - disable the local NTB link.
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * In order to enable the NTB link we need:
+ * - disable Completion TLPs translation
+ * - clear corresponding mapping table entry
+ * - notify peers of NTB link state change
+ */
+static void idt_ntb_local_link_disable(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+
+	/* Disable Completion TLPs translation */
+	idt_nt_write(ndev, IDT_NT_NTCTL, 0);
+
+	/* Clear the corresponding NT Mapping table entry */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	idt_nt_write(ndev, IDT_NT_NTMTBLDATA, 0);
+	mmiowb();
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	/* Notify the peers by setting and clearing the global signal bit */
+	idt_nt_write(ndev, IDT_NT_NTGSIGNAL, IDT_NTGSIGNAL_SET);
+	idt_sw_write(ndev, IDT_SW_SEGSIGSTS, (u32)1 << ndev->part);
+}
+
+/*
+ * idt_ntb_local_link_is_up() - test wethter local NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Local link is up under the following conditions:
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ * NOTE: We don't need to check PCIe link state since it's obviously
+ * up while we are able to communicate with IDT PCIe-switch
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_local_link_is_up(struct idt_ntb_dev *ndev)
+{
+	unsigned long irqflags;
+	u32 data;
+
+	/* Read the local Bus Master Enable status */
+	data = idt_nt_read(ndev, IDT_NT_PCICMDSTS);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Read the local Completion TLPs translation enable status */
+	data = idt_nt_read(ndev, IDT_NT_NTCTL);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the local partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_peer_link_is_up() - test whether peer NTB link is up
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @pidx:	Peer port index
+ *
+ * Peer link is up under the following conditions:
+ * - PCIe link is up
+ * - Bus mastering is enabled
+ * - NTCTL has Completion TLPs translation enabled
+ * - Mapping table permits Request TLPs translation
+ *
+ * Return: true if link is up, otherwise false
+ */
+static bool idt_ntb_peer_link_is_up(struct idt_ntb_dev *ndev, int pidx)
+{
+	unsigned long irqflags;
+	unsigned char port;
+	u32 data;
+
+	/* Retrieve the device port number */
+	port = ndev->peers[pidx].port;
+
+	/* Check whether PCIe link is up */
+	data = idt_sw_read(ndev, portdata_tbl[port].sts);
+	if (!(data & IDT_SWPORTxSTS_LINKUP))
+		return false;
+
+	/* Check whether bus mastering is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].pcicmdsts);
+	if (!(data & IDT_PCICMDSTS_BME))
+		return false;
+
+	/* Check if Completion TLPs translation is enabled on the peer port */
+	data = idt_sw_read(ndev, portdata_tbl[port].ntctl);
+	if (!(data & IDT_NTCTL_CPEN))
+		return false;
+
+	/* Read Mapping table entry corresponding to the peer partition */
+	spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+	idt_nt_write(ndev, IDT_NT_NTMTBLADDR, ndev->peers[pidx].part);
+	data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+	spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+	return !!(data & IDT_NTMTBLDATA_VALID);
+}
+
+/*
+ * idt_ntb_link_is_up() - get the current ntb link state (NTB API callback)
+ * @ntb:	NTB device context.
+ * @speed:	OUT - The link speed expressed as PCIe generation number.
+ * @width:	OUT - The link width expressed as the number of PCIe lanes.
+ *
+ * Get the bitfield of NTB link states for all peer ports
+ *
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
+ */
+static u64 idt_ntb_link_is_up(struct ntb_dev *ntb,
+			      enum ntb_speed *speed, enum ntb_width *width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned char pidx;
+	u64 status;
+	u32 data;
+
+	/* Retrieve the local link speed and width */
+	if (speed != NULL || width != NULL) {
+		data = idt_nt_read(ndev, IDT_NT_PCIELCTLSTS);
+		if (speed != NULL)
+			*speed = GET_FIELD(PCIELCTLSTS_CLS, data);
+		if (width != NULL)
+			*width = GET_FIELD(PCIELCTLSTS_NLW, data);
+	}
+
+	/* If local NTB link isn't up then all the links are considered down */
+	if (!idt_ntb_local_link_is_up(ndev))
+		return 0;
+
+	/* Collect all the peer ports link states into the bitfield */
+	status = 0;
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		if (idt_ntb_peer_link_is_up(ndev, pidx))
+			status |= ((u64)1 << pidx);
+	}
+
+	return status;
+}
+
+/*
+ * idt_ntb_link_enable() - enable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ * @max_speed:	The maximum link speed expressed as PCIe generation number.
+ * @max_width:	The maximum link width expressed as the number of PCIe lanes.
+ *
+ * Enable just local NTB link. PCIe link parameters are ignored.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_enable(struct ntb_dev *ntb, enum ntb_speed speed,
+			       enum ntb_width width)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just enable the local NTB link */
+	idt_ntb_local_link_enable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link enabled");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_link_disable() - disable local port ntb link (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Disable just local NTB link.
+ *
+ * Return: always zero.
+ */
+static int idt_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	/* Just disable the local NTB link */
+	idt_ntb_local_link_disable(ndev);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Local NTB link disabled");
+
+	return 0;
+}
+
+/*=============================================================================
+ *                         4. Memory Window operations
+ *
+ *    IDT PCIe-switches have two types of memory windows: MWs with direct
+ * address translation and MWs with LUT based translation. The first type of
+ * MWs is simple map of corresponding BAR address space to a memory space
+ * of specified target port. So it implemets just ont-to-one mapping. Lookup
+ * table in its turn can map one BAR address space to up to 24 different
+ * memory spaces of different ports.
+ *    NT-functions BARs can be turned on to implement either direct or lookup
+ * table based address translations, so:
+ * BAR0 - NT configuration registers space/direct address translation
+ * BAR1 - direct address translation/upper address of BAR0x64
+ * BAR2 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR3 - direct address translation/upper address of BAR2x64
+ * BAR4 - direct address translation/Lookup table with either 12 or 24 entries
+ * BAR5 - direct address translation/upper address of BAR4x64
+ *    Additionally BAR2 and BAR4 can't have 24-entries LUT enabled at the same
+ * time. Since the BARs setup can be rather complicated this driver implements
+ * a scanning algorithm to have all the possible memory windows configuration
+ * covered.
+ *
+ * NOTE 1 BAR setup must be done before Linux kernel enumerated NT-function
+ * of any port, so this driver would have memory windows configurations fixed.
+ * In this way all initializations must be performed either by platform BIOS
+ * or using EEPROM connected to IDT PCIe-switch master SMBus.
+ *
+ * NOTE 2 This driver expects BAR0 mapping NT-function configuration space.
+ * Easy calculation can give us an upper boundary of 29 possible memory windows
+ * per each NT-function if all the BARs are of 32bit type.
+ *=============================================================================
+ */
+
+/*
+ * idt_get_mw_count() - get memory window count
+ * @mw_type:	Memory window type
+ *
+ * Return: number of memory windows with respect to the BAR type
+ */
+static inline unsigned char idt_get_mw_count(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return 1;
+	case IDT_MW_LUT12:
+		return 12;
+	case IDT_MW_LUT24:
+		return 24;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * idt_get_mw_name() - get memory window name
+ * @mw_type:	Memory window type
+ *
+ * Return: pointer to a string with name
+ */
+static inline char *idt_get_mw_name(enum idt_mw_type mw_type)
+{
+	switch (mw_type) {
+	case IDT_MW_DIR:
+		return "DIR  ";
+	case IDT_MW_LUT12:
+		return "LUT12";
+	case IDT_MW_LUT24:
+		return "LUT24";
+	default:
+		break;
+	}
+
+	return "unknown";
+}
+
+/*
+ * idt_scan_mws() - scan memory windows of the port
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @port:	Port to get number of memory windows for
+ * @mw_cnt:	Out - number of memory windows
+ *
+ * It walks over BAR setup registers of the specified port and determines
+ * the memory windows parameters if any activated.
+ *
+ * Return: array of memory windows
+ */
+static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
+				       unsigned char *mw_cnt)
+{
+	struct idt_mw_cfg mws[IDT_MAX_NR_MWS], *ret_mws;
+	const struct idt_ntb_bar *bars;
+	enum idt_mw_type mw_type;
+	unsigned char widx, bidx, en_cnt;
+	bool bar_64bit = false;
+	int aprt_size;
+	u32 data;
+
+	/* Retrieve the array of the BARs registers */
+	bars = portdata_tbl[port].bars;
+
+	/* Scan all the BARs belonging to the port */
+	*mw_cnt = 0;
+	for (bidx = 0; bidx < IDT_BAR_CNT; bidx += 1 + bar_64bit) {
+		/* Read BARSETUP register value */
+		data = idt_sw_read(ndev, bars[bidx].setup);
+
+		/* Skip disabled BARs */
+		if (!(data & IDT_BARSETUP_EN)) {
+			bar_64bit = false;
+			continue;
+		}
+
+		/* Skip next BARSETUP if current one has 64bit addressing */
+		bar_64bit = IS_FLD_SET(BARSETUP_TYPE, data, 64);
+
+		/* Skip configuration space mapping BARs */
+		if (data & IDT_BARSETUP_MODE_CFG)
+			continue;
+
+		/* Retrieve MW type/entries count and aperture size */
+		mw_type = GET_FIELD(BARSETUP_ATRAN, data);
+		en_cnt = idt_get_mw_count(mw_type);
+		aprt_size = (u64)1 << GET_FIELD(BARSETUP_SIZE, data);
+
+		/* Save configurations of all available memory windows */
+		for (widx = 0; widx < en_cnt; widx++, (*mw_cnt)++) {
+			/*
+			 * IDT can expose a limited number of MWs, so it's bug
+			 * to have more than the driver expects
+			 */
+			if (*mw_cnt >= IDT_MAX_NR_MWS)
+				return ERR_PTR(-EINVAL);
+
+			/* Save basic MW info */
+			mws[*mw_cnt].type = mw_type;
+			mws[*mw_cnt].bar = bidx;
+			mws[*mw_cnt].idx = widx;
+			/* It's always DWORD aligned */
+			mws[*mw_cnt].addr_align = IDT_TRANS_ALIGN;
+			/* DIR and LUT approachs differently configure MWs */
+			if (mw_type == IDT_MW_DIR)
+				mws[*mw_cnt].size_max = aprt_size;
+			else if (mw_type == IDT_MW_LUT12)
+				mws[*mw_cnt].size_max = aprt_size / 16;
+			else
+				mws[*mw_cnt].size_max = aprt_size / 32;
+			mws[*mw_cnt].size_align = (mw_type == IDT_MW_DIR) ?
+				IDT_DIR_SIZE_ALIGN : mws[*mw_cnt].size_max;
+		}
+	}
+
+	/* Allocate memory for memory window descriptors */
+	ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt,
+				sizeof(*ret_mws), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ret_mws))
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy the info of detected memory windows */
+	memcpy(ret_mws, mws, (*mw_cnt)*sizeof(*ret_mws));
+
+	return ret_mws;
+}
+
+/*
+ * idt_init_mws() - initialize memory windows subsystem
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Scan BAR setup registers of local and peer ports to determine the
+ * outbound and inbound memory windows parameters
+ *
+ * Return: zero on success, otherwise a negative error number
+ */
+static int idt_init_mws(struct idt_ntb_dev *ndev)
+{
+	struct idt_ntb_peer *peer;
+	unsigned char pidx;
+
+	/* Scan memory windows of the local port */
+	ndev->mws = idt_scan_mws(ndev, ndev->port, &ndev->mw_cnt);
+	if (IS_ERR(ndev->mws)) {
+		dev_err(&ndev->ntb.pdev->dev,
+			"Failed to scan mws of local port %hhu", ndev->port);
+		return PTR_ERR(ndev->mws);
+	}
+
+	/* Scan memory windows of the peer ports */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		peer = &ndev->peers[pidx];
+		peer->mws = idt_scan_mws(ndev, peer->port, &peer->mw_cnt);
+		if (IS_ERR(peer->mws)) {
+			dev_err(&ndev->ntb.pdev->dev,
+				"Failed to scan mws of port %hhu", peer->port);
+			return PTR_ERR(peer->mws);
+		}
+	}
+
+	/* Initialize spin locker of the LUT registers */
+	spin_lock_init(&ndev->lut_lock);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Outbound and inbound MWs initialized");
+
+	return 0;
+}
+
+/*
+ * idt_ntb_mw_count() - number of inbound memory windows (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ *
+ * The value is returned for the specified peer, so generally speaking it can
+ * be different for different port depending on the IDT PCIe-switch
+ * initialization.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	return ndev->peers[pidx].mw_cnt;
+}
+
+/*
+ * idt_ntb_mw_get_align() - inbound memory window parameters (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * The peer memory window parameters have already been determined, so just
+ * return the corresponding values, which mustn't change within session.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				resource_size_t *addr_align,
+				resource_size_t *size_align,
+				resource_size_t *size_max)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_ntb_peer *peer;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	peer = &ndev->peers[pidx];
+
+	if (widx < 0 || peer->mw_cnt <= widx)
+		return -EINVAL;
+
+	if (addr_align != NULL)
+		*addr_align = peer->mws[widx].addr_align;
+
+	if (size_align != NULL)
+		*size_align = peer->mws[widx].size_align;
+
+	if (size_max != NULL)
+		*size_max = peer->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_count() - number of outbound memory windows
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Outbound memory windows parameters have been determined based on the
+ * BAR setup registers value, which are mostly constants within one session.
+ *
+ * Return: the number of memory windows.
+ */
+static int idt_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return ndev->mw_cnt;
+}
+
+/*
+ * idt_ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ *				(NTB API callback)
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Return just parameters of BAR resources mapping. Size reflects just the size
+ * of the resource
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static int idt_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				    phys_addr_t *base, resource_size_t *size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/* Mapping address is just properly shifted BAR resource start */
+	if (base != NULL)
+		*base = pci_resource_start(ntb->pdev, ndev->mws[widx].bar) +
+			ndev->mws[widx].idx * ndev->mws[widx].size_max;
+
+	/* Mapping size has already been calculated at MWs scanning */
+	if (size != NULL)
+		*size = ndev->mws[widx].size_max;
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *				 (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * The Direct address translation and LUT base translation is initialized a
+ * bit differenet. Although the parameters restriction are now determined by
+ * the same code.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+				     u64 addr, resource_size_t size)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+	u32 data = 0, lutoff = 0;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	/*
+	 * Retrieve the memory window config to make sure the passed arguments
+	 * fit it restrictions
+	 */
+	mw_cfg = &ndev->mws[widx];
+	if (!IS_ALIGNED(addr, mw_cfg->addr_align))
+		return -EINVAL;
+	if (!IS_ALIGNED(size, mw_cfg->size_align) || size > mw_cfg->size_max)
+		return -EINVAL;
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u64 limit;
+		/* Set destination partition of translation */
+		data = idt_nt_read(ndev, bar->setup);
+		data = SET_FIELD(BARSETUP_TPART, data, ndev->peers[pidx].part);
+		idt_nt_write(ndev, bar->setup, data);
+		/* Set translation base address */
+		idt_nt_write(ndev, bar->ltbase, (u32)addr);
+		idt_nt_write(ndev, bar->utbase, (u32)(addr >> 32));
+		/* Set the custom BAR aperture limit */
+		limit = pci_resource_start(ntb->pdev, mw_cfg->bar) + size;
+		idt_nt_write(ndev, bar->limit, (u32)limit);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, (limit >> 32));
+	} else {
+		unsigned long irqflags;
+		/* Initialize corresponding LUT entry */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		data = SET_FIELD(LUTUDATA_PART, 0, ndev->peers[pidx].part) |
+			IDT_LUTUDATA_VALID;
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, (u32)addr);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, (u32)(addr >> 32));
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, data);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+		/* Limit address isn't specified since size is fixed for LUT */
+	}
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_mw_clear_trans() - clear the outbound MW translation address
+ *				   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * It effectively disables the translation over the specified outbound MW.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static int idt_ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					int widx)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	struct idt_mw_cfg *mw_cfg;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	if (widx < 0 || ndev->mw_cnt <= widx)
+		return -EINVAL;
+
+	mw_cfg = &ndev->mws[widx];
+
+	/* DIR and LUT based translations are initialized differently */
+	if (mw_cfg->type == IDT_MW_DIR) {
+		const struct idt_ntb_bar *bar = &ntdata_tbl.bars[mw_cfg->bar];
+		u32 data;
+		/* Read BARSETUP to check BAR type */
+		data = idt_nt_read(ndev, bar->setup);
+		/* Disable translation by specifying zero BAR limit */
+		idt_nt_write(ndev, bar->limit, 0);
+		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
+			idt_nt_write(ndev, (bar + 1)->limit, 0);
+	} else {
+		unsigned long irqflags;
+		u32 lutoff;
+		/* Clear the corresponding LUT entry up */
+		lutoff = SET_FIELD(LUTOFFSET_INDEX, 0, mw_cfg->idx) |
+			 SET_FIELD(LUTOFFSET_BAR, 0, mw_cfg->bar);
+		spin_lock_irqsave(&ndev->lut_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_LUTOFFSET, lutoff);
+		idt_nt_write(ndev, IDT_NT_LUTLDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTMDATA, 0);
+		idt_nt_write(ndev, IDT_NT_LUTUDATA, 0);
+		mmiowb();
+		spin_unlock_irqrestore(&ndev->lut_lock, irqflags);
+	}
+
+	return 0;
+}
+
+/*=============================================================================
+ *                          5. Doorbell operations
+ *
+ *    Doorbell functionality of IDT PCIe-switches is pretty unusual. First of
+ * all there is global doorbell register which state can by changed by any
+ * NT-function of the IDT device in accordance with global permissions. These
+ * permissions configs are not supported by NTB API, so it must be done by
+ * either BIOS or EEPROM settings. In the same way the state of the global
+ * doorbell is reflected to the NT-functions local inbound doorbell registers.
+ * It can lead to situations when client driver sets some peer doorbell bits
+ * and get them bounced back to local inbound doorbell if permissions are
+ * granted.
+ *    Secondly there is just one IRQ vector for Doorbell, Message, Temperature
+ * and Switch events, so if client driver left any of Doorbell bits set and
+ * some other event occurred, the driver will be notified of Doorbell event
+ * again.
+ *=============================================================================
+ */
+
+/*
+ * idt_db_isr() - doorbell event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Doorbell event happans when DBELL bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked doorbell bits are set to ones on completely
+ * zeroed doorbell register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_db_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Doorbell IRQ status will be cleaned only when client
+	 * driver unsets all the doorbell bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "DB IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible doorbell state change */
+	ntb_db_event(&ndev->ntb, 0);
+}
+
+/*
+ * idt_ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose just one Doorbell register of DWORD size.
+ *
+ * Return: A mask of doorbell bits supported by the ntb.
+ */
+static u64 idt_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return IDT_DBELL_MASK;
+}
+
+/*
+ * idt_ntb_db_read() - read the local doorbell register (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * There is just on inbound doorbell register of each NT-function, so
+ * this method return it value.
+ *
+ * Return: The bits currently set in the local doorbell register.
+ */
+static u64 idt_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLSTS);
+}
+
+/*
+ * idt_ntb_db_clear() - clear bits in the local doorbell register
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * Clear bits of inbound doorbell register by writing ones to it.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_INDBELLSTS, (u32)db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_db_read_mask() - read the local doorbell mask (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * Each inbound doorbell bit can be masked from generating IRQ by setting
+ * the corresponding bit in inbound doorbell mask. So this method returns
+ * the value of the register.
+ *
+ * Return: The bits currently set in the local doorbell mask register.
+ */
+static u64 idt_ntb_db_read_mask(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+}
+
+/*
+ * idt_ntb_db_set_mask() - set bits in the local doorbell mask
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell mask bits to set.
+ *
+ * The inbound doorbell register mask value must be read, then OR'ed with
+ * passed field and only then set back.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+				IDT_DBELL_MASK, db_bits);
+}
+
+/*
+ * idt_ntb_db_clear_mask() - clear bits in the local doorbell mask
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to clear.
+ *
+ * The method just clears the set bits up in accordance with the passed
+ * bitfield. IDT PCIe-switch shall generate an interrupt if there hasn't
+ * been any unmasked bit set before current unmasking. Otherwise IRQ won't
+ * be generated since there is only one IRQ vector for all doorbells.
+ *
+ * Return: always zero as success
+ */
+static int idt_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_INDBELLMSK, &ndev->db_mask_lock,
+			   db_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_peer_db_set() - set bits in the peer doorbell register
+ *			   (NTB API callback)
+ * @ntb:	NTB device context.
+ * @db_bits:	Doorbell bits to set.
+ *
+ * IDT PCIe-switches exposes local outbound doorbell register to change peer
+ * inbound doorbell register state.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (db_bits & ~(u64)IDT_DBELL_MASK)
+		return -EINVAL;
+
+	idt_nt_write(ndev, IDT_NT_OUTDBELLSET, (u32)db_bits);
+	return 0;
+}
+
+/*=============================================================================
+ *                          6. Messaging operations
+ *
+ *    Each NT-function of IDT PCIe-switch has four inbound and four outbound
+ * message registers. Each outbound message register can be connected to one or
+ * even more than one peer inbound message registers by setting global
+ * configurations. Since NTB API permits one-on-one message registers mapping
+ * only, the driver acts in according with that restriction.
+ *=============================================================================
+ */
+
+/*
+ * idt_init_msg() - initialize messaging interface
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just initialize the message registers routing tables locker.
+ */
+static void idt_init_msg(struct idt_ntb_dev *ndev)
+{
+	unsigned char midx;
+
+	/* Init the messages routing table lockers */
+	for (midx = 0; midx < IDT_MSG_CNT; midx++)
+		spin_lock_init(&ndev->msg_locks[midx]);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB Messaging initialized");
+}
+
+/*
+ * idt_msg_isr() - message event ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * Message event happens when MSG bit of NTINTSTS switches from 0 to 1.
+ * It happens only when unmasked message status bits are set to ones on
+ * completely zeroed message status register.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_msg_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	/*
+	 * Message IRQ status will be cleaned only when client
+	 * driver unsets all the message status bits.
+	 */
+	dev_dbg(&ndev->ntb.pdev->dev, "Message IRQ detected %#08x", ntint_sts);
+
+	/* Notify the client driver of possible message status change */
+	ntb_msg_event(&ndev->ntb);
+}
+
+/*
+ * idt_ntb_msg_count() - get the number of message registers (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches support four message registers.
+ *
+ * Return: the number of message registers.
+ */
+static int idt_ntb_msg_count(struct ntb_dev *ntb)
+{
+	return IDT_MSG_CNT;
+}
+
+/*
+ * idt_ntb_msg_inbits() - get a bitfield of inbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static u64 idt_ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_INMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_outbits() - get a bitfield of outbound message registers status
+ *			  (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * NT message status register is shared between inbound and outbound message
+ * registers status
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static u64 idt_ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	return (u64)IDT_OUTMSG_MASK;
+}
+
+/*
+ * idt_ntb_msg_read_sts() - read the message registers status (NTB API callback)
+ * @ntb:	NTB device context.
+ *
+ * IDT PCIe-switches expose message status registers to notify drivers of
+ * incoming data and failures in case if peer message register isn't freed.
+ *
+ * Return: status bits of message registers
+ */
+static u64 idt_ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_nt_read(ndev, IDT_NT_MSGSTS);
+}
+
+/*
+ * idt_ntb_msg_clear_sts() - clear status bits of message registers
+ *			     (NTB API callback)
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register by writing ones.
+ *
+ * NOTE! Invalid bits are always considered cleared so it's not an error
+ * to clear them over.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_nt_write(ndev, IDT_NT_MSGSTS, sts_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_set_mask() - set mask of message register status bits
+ *			    (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message status bits from raising an IRQ.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	return idt_reg_set_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+				IDT_MSG_MASK, mask_bits);
+}
+
+/*
+ * idt_ntb_msg_clear_mask() - clear message registers mask
+ *			      (NTB API callback)
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Clear mask of message status bits IRQs.
+ *
+ * Return: always zero as success.
+ */
+static int idt_ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	idt_reg_clear_bits(ndev, IDT_NT_MSGSTSMSK, &ndev->msg_mask_lock,
+			   mask_bits);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_read() - read message register with specified index
+ *			(NTB API callback)
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @msg:	OUT - Data
+ *
+ * Read data from the specified message register and source register.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return -EINVAL;
+
+	/* Retrieve source port index of the message */
+	if (pidx != NULL) {
+		u32 srcpart;
+
+		srcpart = idt_nt_read(ndev, ntdata_tbl.msgs[midx].src);
+		*pidx = ndev->part_idx_map[srcpart];
+
+		/* Sanity check partition index (for initial case) */
+		if (*pidx == -EINVAL)
+			*pidx = 0;
+	}
+
+	/* Retrieve data of the corresponding message register */
+	if (msg != NULL)
+		*msg = idt_nt_read(ndev, ntdata_tbl.msgs[midx].in);
+
+	return 0;
+}
+
+/*
+ * idt_ntb_msg_write() - write data to the specified message register
+ *			 (NTB API callback)
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	Port index of peer device a message being sent to
+ * @msg:	Data to send
+ *
+ * Just try to send data to a peer. Message status register should be
+ * checked by client driver.
+ *
+ * Return: zero on success, negative error if invalid argument passed.
+ */
+static int idt_ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, u32 msg)
+{
+	struct idt_ntb_dev *ndev = to_ndev_ntb(ntb);
+	unsigned long irqflags;
+	u32 swpmsgctl = 0;
+
+	if (midx < 0 || IDT_MSG_CNT <= midx)
+		return -EINVAL;
+
+	if (pidx < 0 || ndev->peer_cnt <= pidx)
+		return -EINVAL;
+
+	/* Collect the routing information */
+	swpmsgctl = SET_FIELD(SWPxMSGCTL_REG, 0, midx) |
+		    SET_FIELD(SWPxMSGCTL_PART, 0, ndev->peers[pidx].part);
+
+	/* Lock the messages routing table of the specified register */
+	spin_lock_irqsave(&ndev->msg_locks[midx], irqflags);
+	/* Set the route and send the data */
+	idt_sw_write(ndev, partdata_tbl[ndev->part].msgctl[midx], swpmsgctl);
+	idt_nt_write(ndev, ntdata_tbl.msgs[midx].out, msg);
+	mmiowb();
+	/* Unlock the messages routing table */
+	spin_unlock_irqrestore(&ndev->msg_locks[midx], irqflags);
+
+	/* Client driver shall check the status register */
+	return 0;
+}
+
+/*=============================================================================
+ *                      7. Temperature sensor operations
+ *
+ *    IDT PCIe-switch has an embedded temperature sensor, which can be used to
+ * warn a user-space of possible chip overheating. Since workload temperature
+ * can be different on different platforms, temperature thresholds as well as
+ * general sensor settings must be setup in the framework of BIOS/EEPROM
+ * initializations. It includes the actual sensor enabling as well.
+ *=============================================================================
+ */
+
+/*
+ * idt_read_temp() - read temperature from chip sensor
+ * @ntb:	NTB device context.
+ * @val:	OUT - integer value of temperature
+ * @frac:	OUT - fraction
+ */
+static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
+			  unsigned char *frac)
+{
+	u32 data;
+
+	/* Read the data from TEMP field of the TMPSTS register */
+	data = idt_sw_read(ndev, IDT_SW_TMPSTS);
+	data = GET_FIELD(TMPSTS_TEMP, data);
+	/* TEMP field has one fractional bit and seven integer bits */
+	*val = data >> 1;
+	*frac = ((data & 0x1) ? 5 : 0);
+}
+
+/*
+ * idt_temp_isr() - temperature sensor alarm events ISR
+ * @ndev:	IDT NTB hardware driver descriptor
+ * @ntint_sts:	NT-function interrupt status
+ *
+ * It handles events of temperature crossing alarm thresholds. Since reading
+ * of TMPALARM register clears it up, the function doesn't analyze the
+ * read value, instead the current temperature value just warningly printed to
+ * log.
+ * The method is called from PCIe ISR bottom-half routine.
+ */
+static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
+{
+	unsigned char val, frac;
+
+	/* Read the current temperature value */
+	idt_read_temp(ndev, &val, &frac);
+
+	/* Read the temperature alarm to clean the alarm status out */
+	/*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
+
+	/* Clean the corresponding interrupt bit */
+	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_TMPSENSOR);
+
+	dev_dbg(&ndev->ntb.pdev->dev,
+		"Temp sensor IRQ detected %#08x", ntint_sts);
+
+	/* Print temperature value to log */
+	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhu.%hhu", val, frac);
+}
+
+/*=============================================================================
+ *                           8. ISRs related operations
+ *
+ *    IDT PCIe-switch has strangely developed IRQ system. There is just one
+ * interrupt vector for doorbell and message registers. So the hardware driver
+ * can't determine actual source of IRQ if, for example, message event happened
+ * while any of unmasked doorbell is still set. The similar situation may be if
+ * switch or temperature sensor events pop up. The difference is that SEVENT
+ * and TMPSENSOR bits of NT interrupt status register can be cleaned by
+ * IRQ handler so a next interrupt request won't have false handling of
+ * corresponding events.
+ *    The hardware driver has only bottom-half handler of the IRQ, since if any
+ * of events happened the device won't raise it again before the last one is
+ * handled by clearing of corresponding NTINTSTS bit.
+ *=============================================================================
+ */
+
+static irqreturn_t idt_thread_isr(int irq, void *devid);
+
+/*
+ * idt_init_isr() - initialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+	int ret;
+
+	/* Allocate just one interrupt vector for the ISR */
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+	if (ret != 1) {
+		dev_err(&pdev->dev, "Failed to allocate IRQ vector");
+		return ret;
+	}
+
+	/* Retrieve the IRQ vector */
+	ret = pci_irq_vector(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to get IRQ vector");
+		goto err_free_vectors;
+	}
+
+	/* Set the IRQ handler */
+	ret = devm_request_threaded_irq(&pdev->dev, ret, NULL, idt_thread_isr,
+					IRQF_ONESHOT, NTB_IRQNAME, ndev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to set MSI IRQ handler, %d", ret);
+		goto err_free_vectors;
+	}
+
+	/* Unmask Message/Doorbell/SE/Temperature interrupts */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) & ~IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* From now on the interrupts are enabled */
+	dev_dbg(&pdev->dev, "NTB interrupts initialized");
+
+	return 0;
+
+err_free_vectors:
+	pci_free_irq_vectors(pdev);
+
+	return ret;
+}
+
+
+/*
+ * idt_deinit_ist() - deinitialize PCIe interrupt handler
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Disable corresponding interrupts and free allocated IRQ vectors.
+ */
+static void idt_deinit_isr(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	u32 ntint_mask;
+
+	/* Mask interrupts back */
+	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) | IDT_NTINTMSK_ALL;
+	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
+
+	/* Manually free IRQ otherwise PCI free irq vectors will fail */
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 0), ndev);
+
+	/* Free allocated IRQ vectors */
+	pci_free_irq_vectors(pdev);
+
+	dev_dbg(&pdev->dev, "NTB interrupts deinitialized");
+}
+
+/*
+ * idt_thread_isr() - NT function interrupts handler
+ * @irq:	IRQ number
+ * @devid:	Custom buffer
+ *
+ * It reads current NT interrupts state register and handles all the event
+ * it declares.
+ * The method is bottom-half routine of actual default PCIe IRQ handler.
+ */
+static irqreturn_t idt_thread_isr(int irq, void *devid)
+{
+	struct idt_ntb_dev *ndev = devid;
+	bool handled = false;
+	u32 ntint_sts;
+
+	/* Read the NT interrupts status register */
+	ntint_sts = idt_nt_read(ndev, IDT_NT_NTINTSTS);
+
+	/* Handle messaging interrupts */
+	if (ntint_sts & IDT_NTINTSTS_MSG) {
+		idt_msg_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle doorbell interrupts */
+	if (ntint_sts & IDT_NTINTSTS_DBELL) {
+		idt_db_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle switch event interrupts */
+	if (ntint_sts & IDT_NTINTSTS_SEVENT) {
+		idt_se_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	/* Handle temperature sensor interrupt */
+	if (ntint_sts & IDT_NTINTSTS_TMPSENSOR) {
+		idt_temp_isr(ndev, ntint_sts);
+		handled = true;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "IDT IRQs 0x%08x handled", ntint_sts);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*===========================================================================
+ *                     9. NTB hardware driver initialization
+ *===========================================================================
+ */
+
+/*
+ * NTB API operations
+ */
+static const struct ntb_dev_ops idt_ntb_ops = {
+	.port_number		= idt_ntb_port_number,
+	.peer_port_count	= idt_ntb_peer_port_count,
+	.peer_port_number	= idt_ntb_peer_port_number,
+	.peer_port_idx		= idt_ntb_peer_port_idx,
+	.link_is_up		= idt_ntb_link_is_up,
+	.link_enable		= idt_ntb_link_enable,
+	.link_disable		= idt_ntb_link_disable,
+	.mw_count		= idt_ntb_mw_count,
+	.mw_get_align		= idt_ntb_mw_get_align,
+	.peer_mw_count		= idt_ntb_peer_mw_count,
+	.peer_mw_get_addr	= idt_ntb_peer_mw_get_addr,
+	.peer_mw_set_trans	= idt_ntb_peer_mw_set_trans,
+	.peer_mw_clear_trans	= idt_ntb_peer_mw_clear_trans,
+	.db_valid_mask		= idt_ntb_db_valid_mask,
+	.db_read		= idt_ntb_db_read,
+	.db_clear		= idt_ntb_db_clear,
+	.db_read_mask		= idt_ntb_db_read_mask,
+	.db_set_mask		= idt_ntb_db_set_mask,
+	.db_clear_mask		= idt_ntb_db_clear_mask,
+	.peer_db_set		= idt_ntb_peer_db_set,
+	.msg_count		= idt_ntb_msg_count,
+	.msg_inbits		= idt_ntb_msg_inbits,
+	.msg_outbits		= idt_ntb_msg_outbits,
+	.msg_read_sts		= idt_ntb_msg_read_sts,
+	.msg_clear_sts		= idt_ntb_msg_clear_sts,
+	.msg_set_mask		= idt_ntb_msg_set_mask,
+	.msg_clear_mask		= idt_ntb_msg_clear_mask,
+	.msg_read		= idt_ntb_msg_read,
+	.msg_write		= idt_ntb_msg_write
+};
+
+/*
+ * idt_register_device() - register IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_register_device(struct idt_ntb_dev *ndev)
+{
+	int ret;
+
+	/* Initialize the rest of NTB device structure and register it */
+	ndev->ntb.ops = &idt_ntb_ops;
+	ndev->ntb.topo = NTB_TOPO_PRI;
+
+	ret = ntb_register_device(&ndev->ntb);
+	if (ret != 0) {
+		dev_err(&ndev->ntb.pdev->dev, "Failed to register NTB device");
+		return ret;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device successfully registered");
+
+	return 0;
+}
+
+/*
+ * idt_unregister_device() - unregister IDT NTB device
+ * @ndev:	IDT NTB hardware driver descriptor
+ */
+static void idt_unregister_device(struct idt_ntb_dev *ndev)
+{
+	/* Just unregister the NTB device */
+	ntb_unregister_device(&ndev->ntb);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device unregistered");
+}
+
+/*=============================================================================
+ *                        10. DebugFS node initialization
+ *=============================================================================
+ */
+
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp);
+
+/*
+ * Driver DebugFS info file operations
+ */
+static const struct file_operations idt_dbgfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = idt_dbgfs_info_read
+};
+
+/*
+ * idt_dbgfs_info_read() - DebugFS read info node callback
+ * @file:	File node descriptor.
+ * @ubuf:	User-space buffer to put data to
+ * @count:	Size of the buffer
+ * @offp:	Offset within the buffer
+ */
+static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
+				   size_t count, loff_t *offp)
+{
+	struct idt_ntb_dev *ndev = filp->private_data;
+	unsigned char temp, frac, idx, pidx, cnt;
+	ssize_t ret = 0, off = 0;
+	unsigned long irqflags;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	char *strbuf;
+	size_t size;
+	u32 data;
+
+	/* Lets limit the buffer size the way the Intel/AMD drivers do */
+	size = min_t(size_t, count, 0x1000U);
+
+	/* Allocate the memory for the buffer */
+	strbuf = kmalloc(size, GFP_KERNEL);
+	if (strbuf == NULL)
+		return -ENOMEM;
+
+	/* Put the data into the string buffer */
+	off += scnprintf(strbuf + off, size - off,
+		"\n\t\tIDT NTB device Information:\n\n");
+
+	/* General local device configurations */
+	off += scnprintf(strbuf + off, size - off,
+		"Local Port %hhu, Partition %hhu\n", ndev->port, ndev->part);
+
+	/* Peer ports information */
+	off += scnprintf(strbuf + off, size - off, "Peers:\n");
+	for (idx = 0; idx < ndev->peer_cnt; idx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. Port %hhu, Partition %hhu\n",
+			idx, ndev->peers[idx].port, ndev->peers[idx].part);
+	}
+
+	/* Links status */
+	data = idt_ntb_link_is_up(&ndev->ntb, &speed, &width);
+	off += scnprintf(strbuf + off, size - off,
+		"NTB link status\t- 0x%08x, ", data);
+	off += scnprintf(strbuf + off, size - off, "PCIe Gen %d x%d lanes\n",
+		speed, width);
+
+	/* Mapping table entries */
+	off += scnprintf(strbuf + off, size - off, "NTB Mapping Table:\n");
+	for (idx = 0; idx < IDT_MTBL_ENTRY_CNT; idx++) {
+		spin_lock_irqsave(&ndev->mtbl_lock, irqflags);
+		idt_nt_write(ndev, IDT_NT_NTMTBLADDR, idx);
+		data = idt_nt_read(ndev, IDT_NT_NTMTBLDATA);
+		spin_unlock_irqrestore(&ndev->mtbl_lock, irqflags);
+
+		/* Print valid entries only */
+		if (data & IDT_NTMTBLDATA_VALID) {
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu. Partition %d, Requester ID 0x%04x\n",
+				idx, GET_FIELD(NTMTBLDATA_PART, data),
+				GET_FIELD(NTMTBLDATA_REQID, data));
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Outbound memory windows information */
+	off += scnprintf(strbuf + off, size - off,
+		"Outbound Memory Windows:\n");
+	for (idx = 0; idx < ndev->mw_cnt; idx += cnt) {
+		data = ndev->mws[idx].type;
+		cnt = idt_get_mw_count(data);
+
+		/* Print Memory Window information */
+		if (data == IDT_MW_DIR)
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu.\t", idx);
+		else
+			off += scnprintf(strbuf + off, size - off,
+				"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+		off += scnprintf(strbuf + off, size - off, "%s BAR%hhu, ",
+			idt_get_mw_name(data), ndev->mws[idx].bar);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Address align 0x%08llx, ", ndev->mws[idx].addr_align);
+
+		off += scnprintf(strbuf + off, size - off,
+			"Size align 0x%08llx, Size max %llu\n",
+			ndev->mws[idx].size_align, ndev->mws[idx].size_max);
+	}
+
+	/* Inbound memory windows information */
+	for (pidx = 0; pidx < ndev->peer_cnt; pidx++) {
+		off += scnprintf(strbuf + off, size - off,
+			"Inbound Memory Windows for peer %hhu (Port %hhu):\n",
+			pidx, ndev->peers[pidx].port);
+
+		/* Print Memory Windows information */
+		for (idx = 0; idx < ndev->peers[pidx].mw_cnt; idx += cnt) {
+			data = ndev->peers[pidx].mws[idx].type;
+			cnt = idt_get_mw_count(data);
+
+			if (data == IDT_MW_DIR)
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu.\t", idx);
+			else
+				off += scnprintf(strbuf + off, size - off,
+					"\t%hhu-%hhu.\t", idx, idx + cnt - 1);
+
+			off += scnprintf(strbuf + off, size - off,
+				"%s BAR%hhu, ", idt_get_mw_name(data),
+				ndev->peers[pidx].mws[idx].bar);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Address align 0x%08llx, ",
+				ndev->peers[pidx].mws[idx].addr_align);
+
+			off += scnprintf(strbuf + off, size - off,
+				"Size align 0x%08llx, Size max %llu\n",
+				ndev->peers[pidx].mws[idx].size_align,
+				ndev->peers[pidx].mws[idx].size_max);
+		}
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Doorbell information */
+	data = idt_sw_read(ndev, IDT_SW_GDBELLSTS);
+	off += scnprintf(strbuf + off, size - off,
+		 "Global Doorbell state\t- 0x%08x\n", data);
+	data = idt_ntb_db_read(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell state\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_INDBELLMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Local  Doorbell mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Messaging information */
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event valid\t- 0x%08x\n", IDT_MSG_MASK);
+	data = idt_ntb_msg_read_sts(&ndev->ntb);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event status\t- 0x%08x\n", data);
+	data = idt_nt_read(ndev, IDT_NT_MSGSTSMSK);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message event mask\t- 0x%08x\n", data);
+	off += scnprintf(strbuf + off, size - off,
+		 "Message data:\n");
+	for (idx = 0; idx < IDT_MSG_CNT; idx++) {
+		int src;
+		(void)idt_ntb_msg_read(&ndev->ntb, idx, &src, &data);
+		off += scnprintf(strbuf + off, size - off,
+			"\t%hhu. 0x%08x from peer %hhu (Port %hhu)\n",
+			idx, data, src, ndev->peers[src].port);
+	}
+	off += scnprintf(strbuf + off, size - off, "\n");
+
+	/* Current temperature */
+	idt_read_temp(ndev, &temp, &frac);
+	off += scnprintf(strbuf + off, size - off,
+		"Switch temperature\t\t- %hhu.%hhuC\n", temp, frac);
+
+	/* Copy the buffer to the User Space */
+	ret = simple_read_from_buffer(ubuf, count, offp, strbuf, off);
+	kfree(strbuf);
+
+	return ret;
+}
+
+/*
+ * idt_init_dbgfs() - initialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_init_dbgfs(struct idt_ntb_dev *ndev)
+{
+	char devname[64];
+
+	/* If the top directory is not created then do nothing */
+	if (IS_ERR_OR_NULL(dbgfs_topdir)) {
+		dev_info(&ndev->ntb.pdev->dev, "Top DebugFS directory absent");
+		return PTR_ERR(dbgfs_topdir);
+	}
+
+	/* Create the info file node */
+	snprintf(devname, 64, "info:%s", pci_name(ndev->ntb.pdev));
+	ndev->dbgfs_info = debugfs_create_file(devname, 0400, dbgfs_topdir,
+		ndev, &idt_dbgfs_info_ops);
+	if (IS_ERR(ndev->dbgfs_info)) {
+		dev_dbg(&ndev->ntb.pdev->dev, "Failed to create DebugFS node");
+		return PTR_ERR(ndev->dbgfs_info);
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node created");
+
+	return 0;
+}
+
+/*
+ * idt_deinit_dbgfs() - deinitialize DebugFS node
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Just discard the info node from DebugFS
+ */
+static void idt_deinit_dbgfs(struct idt_ntb_dev *ndev)
+{
+	debugfs_remove(ndev->dbgfs_info);
+
+	dev_dbg(&ndev->ntb.pdev->dev, "NTB device DebugFS node discarded");
+}
+
+/*=============================================================================
+ *                     11. Basic PCIe device initialization
+ *=============================================================================
+ */
+
+/*
+ * idt_check_setup() - Check whether the IDT PCIe-swtich is properly
+ *		       pre-initialized
+ * @pdev:	Pointer to the PCI device descriptor
+ *
+ * Return: zero on success, otherwise a negative error number.
+ */
+static int idt_check_setup(struct pci_dev *pdev)
+{
+	u32 data;
+	int ret;
+
+	/* Read the BARSETUP0 */
+	ret = pci_read_config_dword(pdev, IDT_NT_BARSETUP0, &data);
+	if (ret != 0) {
+		dev_err(&pdev->dev,
+			"Failed to read BARSETUP0 config register");
+		return ret;
+	}
+
+	/* Check whether the BAR0 register is enabled to be of config space */
+	if (!(data & IDT_BARSETUP_EN) || !(data & IDT_BARSETUP_MODE_CFG)) {
+		dev_err(&pdev->dev, "BAR0 doesn't map config space");
+		return -EINVAL;
+	}
+
+	/* Configuration space BAR0 must have certain size */
+	if ((data & IDT_BARSETUP_SIZE_MASK) != IDT_BARSETUP_SIZE_CFG) {
+		dev_err(&pdev->dev, "Invalid size of config space");
+		return -EINVAL;
+	}
+
+	dev_dbg(&pdev->dev, "NTB device pre-initialized correctly");
+
+	return 0;
+}
+
+/*
+ * Create the IDT PCIe-switch driver descriptor
+ * @pdev:	Pointer to the PCI device descriptor
+ * @id:		IDT PCIe-device configuration
+ *
+ * It just allocates a memory for IDT PCIe-switch device structure and
+ * initializes some commonly used fields.
+ *
+ * No need of release method, since managed device resource is used for
+ * memory allocation.
+ *
+ * Return: pointer to the descriptor, otherwise a negative error number.
+ */
+static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev,
+					  const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+
+	/* Allocate memory for the IDT PCIe-device descriptor */
+	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(ndev)) {
+		dev_err(&pdev->dev, "Memory allocation failed for descriptor");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Save the IDT PCIe-switch ports configuration */
+	ndev->swcfg = (struct idt_89hpes_cfg *)id->driver_data;
+	/* Save the PCI-device pointer inside the NTB device structure */
+	ndev->ntb.pdev = pdev;
+
+	/* Initialize spin locker of Doorbell, Message and GASA registers */
+	spin_lock_init(&ndev->db_mask_lock);
+	spin_lock_init(&ndev->msg_mask_lock);
+	spin_lock_init(&ndev->gasa_lock);
+
+	dev_info(&pdev->dev, "IDT %s discovered", ndev->swcfg->name);
+
+	dev_dbg(&pdev->dev, "NTB device descriptor created");
+
+	return ndev;
+}
+
+/*
+ * idt_init_pci() - initialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed device resources will be freed automatically in case of failure or
+ * driver detachment.
+ *
+ * Return: zero on success, otherwise negative error number.
+ */
+static int idt_init_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+	int ret;
+
+	/* Initialize the bit mask of DMA */
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev, "Failed to set DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev, "Cannot set DMA highmem bit mask\n");
+	}
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (ret != 0) {
+		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to set consistent DMA bit mask\n");
+			return ret;
+		}
+		dev_warn(&pdev->dev,
+			"Cannot set consistent DMA highmem bit mask\n");
+	}
+
+	/*
+	 * Enable the device advanced error reporting. It's not critical to
+	 * have AER disabled in the kernel.
+	 */
+	ret = pci_enable_pcie_error_reporting(pdev);
+	if (ret != 0)
+		dev_warn(&pdev->dev, "PCIe AER capability disabled\n");
+	else /* Cleanup uncorrectable error status before getting to init */
+		pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	/* First enable the PCI device */
+	ret = pcim_enable_device(pdev);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to enable PCIe device\n");
+		goto err_disable_aer;
+	}
+
+	/*
+	 * Enable the bus mastering, which effectively enables MSI IRQs and
+	 * Request TLPs translation
+	 */
+	pci_set_master(pdev);
+
+	/* Request all BARs resources and map BAR0 only */
+	ret = pcim_iomap_regions_request_all(pdev, 1, NTB_NAME);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request resources\n");
+		goto err_clear_master;
+	}
+
+	/* Retrieve virtual address of BAR0 - PCI configuration space */
+	ndev->cfgspc = pcim_iomap_table(pdev)[0];
+
+	/* Put the IDT driver data pointer to the PCI-device private pointer */
+	pci_set_drvdata(pdev, ndev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface initialized");
+
+	return 0;
+
+err_clear_master:
+	pci_clear_master(pdev);
+err_disable_aer:
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	return ret;
+}
+
+/*
+ * idt_deinit_pci() - deinitialize the basic PCI-related subsystem
+ * @ndev:	Pointer to the IDT PCIe-switch driver descriptor
+ *
+ * Managed resources will be freed on the driver detachment
+ */
+static void idt_deinit_pci(struct idt_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev->ntb.pdev;
+
+	/* Clean up the PCI-device private data pointer */
+	pci_set_drvdata(pdev, NULL);
+
+	/* Clear the bus master disabling the Request TLPs translation */
+	pci_clear_master(pdev);
+
+	/* Disable the AER capability */
+	(void)pci_disable_pcie_error_reporting(pdev);
+
+	dev_dbg(&pdev->dev, "NT-function PCIe interface cleared");
+}
+
+/*===========================================================================
+ *                       12. PCI bus callback functions
+ *===========================================================================
+ */
+
+/*
+ * idt_pci_probe() - PCI device probe callback
+ * @pdev:	Pointer to PCI device structure
+ * @id:		PCIe device custom descriptor
+ *
+ * Return: zero on success, otherwise negative error number
+ */
+static int idt_pci_probe(struct pci_dev *pdev,
+			 const struct pci_device_id *id)
+{
+	struct idt_ntb_dev *ndev;
+	int ret;
+
+	/* Check whether IDT PCIe-switch is properly pre-initialized */
+	ret = idt_check_setup(pdev);
+	if (ret != 0)
+		return ret;
+
+	/* Allocate the memory for IDT NTB device data */
+	ndev = idt_create_dev(pdev, id);
+	if (IS_ERR_OR_NULL(ndev))
+		return PTR_ERR(ndev);
+
+	/* Initialize the basic PCI subsystem of the device */
+	ret = idt_init_pci(ndev);
+	if (ret != 0)
+		return ret;
+
+	/* Scan ports of the IDT PCIe-switch */
+	(void)idt_scan_ports(ndev);
+
+	/* Initialize NTB link events subsystem */
+	idt_init_link(ndev);
+
+	/* Initialize MWs subsystem */
+	ret = idt_init_mws(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Initialize Messaging subsystem */
+	idt_init_msg(ndev);
+
+	/* Initialize IDT interrupts handler */
+	ret = idt_init_isr(ndev);
+	if (ret != 0)
+		goto err_deinit_link;
+
+	/* Register IDT NTB devices on the NTB bus */
+	ret = idt_register_device(ndev);
+	if (ret != 0)
+		goto err_deinit_isr;
+
+	/* Initialize DebugFS info node */
+	(void)idt_init_dbgfs(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is ready");
+
+	/* May the force be with us... */
+	return 0;
+
+err_deinit_isr:
+	idt_deinit_isr(ndev);
+err_deinit_link:
+	idt_deinit_link(ndev);
+	idt_deinit_pci(ndev);
+
+	return ret;
+}
+
+/*
+ * idt_pci_probe() - PCI device remove callback
+ * @pdev:	Pointer to PCI device structure
+ */
+static void idt_pci_remove(struct pci_dev *pdev)
+{
+	struct idt_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	/* Deinit the DebugFS node */
+	idt_deinit_dbgfs(ndev);
+
+	/* Unregister NTB device */
+	idt_unregister_device(ndev);
+
+	/* Stop the interrupts handling */
+	idt_deinit_isr(ndev);
+
+	/* Deinitialize link event subsystem */
+	idt_deinit_link(ndev);
+
+	/* Deinit basic PCI subsystem */
+	idt_deinit_pci(ndev);
+
+	/* IDT PCIe-switch NTB driver is finally initialized */
+	dev_info(&pdev->dev, "IDT NTB device is removed");
+
+	/* Sayonara... */
+}
+
+/*
+ * IDT PCIe-switch models ports configuration structures
+ */
+static struct idt_89hpes_cfg idt_89hpes24nt6ag2_config = {
+	.name = "89HPES24NT6AG2",
+	.port_cnt = 6, .ports = {0, 2, 4, 6, 8, 12}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt8ag2_config = {
+	.name = "89HPES32NT8AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt8bg2_config = {
+	.name = "89HPES32NT8BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes12nt12g2_config = {
+	.name = "89HPES12NT12G2",
+	.port_cnt = 3, .ports = {0, 8, 16}
+};
+static struct idt_89hpes_cfg idt_89hpes16nt16g2_config = {
+	.name = "89HPES16NT16G2",
+	.port_cnt = 4, .ports = {0, 8, 12, 16}
+};
+static struct idt_89hpes_cfg idt_89hpes24nt24g2_config = {
+	.name = "89HPES24NT24G2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt24ag2_config = {
+	.name = "89HPES32NT24AG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+static struct idt_89hpes_cfg idt_89hpes32nt24bg2_config = {
+	.name = "89HPES32NT24BG2",
+	.port_cnt = 8, .ports = {0, 2, 4, 6, 8, 12, 16, 20}
+};
+
+/*
+ * PCI-ids table of the supported IDT PCIe-switch devices
+ */
+static const struct pci_device_id idt_pci_tbl[] = {
+	{IDT_PCI_DEVICE_IDS(89HPES24NT6AG2,  idt_89hpes24nt6ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8AG2,  idt_89hpes32nt8ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT8BG2,  idt_89hpes32nt8bg2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES12NT12G2,  idt_89hpes12nt12g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES16NT16G2,  idt_89hpes16nt16g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES24NT24G2,  idt_89hpes24nt24g2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24AG2, idt_89hpes32nt24ag2_config)},
+	{IDT_PCI_DEVICE_IDS(89HPES32NT24BG2, idt_89hpes32nt24bg2_config)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, idt_pci_tbl);
+
+/*
+ * IDT PCIe-switch NT-function device driver structure definition
+ */
+static struct pci_driver idt_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.probe		= idt_pci_probe,
+	.remove		= idt_pci_remove,
+	.id_table	= idt_pci_tbl,
+};
+
+static int __init idt_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	/* Create the top DebugFS directory if the FS is initialized */
+	if (debugfs_initialized())
+		dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	/* Register the NTB hardware driver to handle the PCI device */
+	return pci_register_driver(&idt_pci_driver);
+}
+module_init(idt_pci_driver_init);
+
+static void __exit idt_pci_driver_exit(void)
+{
+	/* Unregister the NTB hardware driver */
+	pci_unregister_driver(&idt_pci_driver);
+
+	/* Discard the top DebugFS directory */
+	debugfs_remove_recursive(dbgfs_topdir);
+}
+module_exit(idt_pci_driver_exit);
+

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
new file mode 100644
index 0000000..856fd18
--- /dev/null
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h

@@ -0,0 +1,1149 @@
+/*
+ *   This file is provided under a GPLv2 license.  When using or
+ *   redistributing this file, you may do so under that license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms and conditions of the GNU General Public License,
+ *   version 2, as published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ *   Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, one can be found http://www.gnu.org/licenses/.
+ *
+ *   The full GNU General Public License is included in this distribution in
+ *   the file called "COPYING".
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * IDT PCIe-switch NTB Linux driver
+ *
+ * Contact Information:
+ * Serge Semin <fancer.lancer@gmail.com>, <Sergey.Semin@t-platforms.ru>
+ */
+
+#ifndef NTB_HW_IDT_H
+#define NTB_HW_IDT_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/ntb.h>
+
+
+/*
+ * Macro is used to create the struct pci_device_id that matches
+ * the supported IDT PCIe-switches
+ * @devname: Capitalized name of the particular device
+ * @data: Variable passed to the driver of the particular device
+ */
+#define IDT_PCI_DEVICE_IDS(devname, data) \
+	.vendor = PCI_VENDOR_ID_IDT, .device = PCI_DEVICE_ID_IDT_##devname, \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \
+	.class = (PCI_CLASS_BRIDGE_OTHER << 8), .class_mask = (0xFFFF00), \
+	.driver_data = (kernel_ulong_t)&data
+
+/*
+ * IDT PCIe-switches device IDs
+ */
+#define PCI_DEVICE_ID_IDT_89HPES24NT6AG2  0x8091
+#define PCI_DEVICE_ID_IDT_89HPES32NT8AG2  0x808F
+#define PCI_DEVICE_ID_IDT_89HPES32NT8BG2  0x8088
+#define PCI_DEVICE_ID_IDT_89HPES12NT12G2  0x8092
+#define PCI_DEVICE_ID_IDT_89HPES16NT16G2  0x8090
+#define PCI_DEVICE_ID_IDT_89HPES24NT24G2  0x808E
+#define PCI_DEVICE_ID_IDT_89HPES32NT24AG2 0x808C
+#define PCI_DEVICE_ID_IDT_89HPES32NT24BG2 0x808A
+
+/*
+ * NT-function Configuration Space registers
+ * NOTE 1) The IDT PCIe-switch internal data is little-endian
+ *      so it must be taken into account in the driver
+ *      internals.
+ *      2) Additionally the registers should be accessed either
+ *      with byte-enables corresponding to their native size or
+ *      the size of one DWORD
+ *
+ * So to simplify the driver code, there is only DWORD-sized read/write
+ * operations utilized.
+ */
+/* PCI Express Configuration Space */
+/* PCI Express command/status register	(DWORD) */
+#define IDT_NT_PCICMDSTS		0x00004U
+/* PCI Express Device Capabilities	(DWORD) */
+#define IDT_NT_PCIEDCAP			0x00044U
+/* PCI Express Device Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIEDCTLSTS		0x00048U
+/* PCI Express Link Capabilities	(DWORD) */
+#define IDT_NT_PCIELCAP			0x0004CU
+/* PCI Express Link Control/Status	(WORD+WORD) */
+#define IDT_NT_PCIELCTLSTS		0x00050U
+/* PCI Express Device Capabilities 2	(DWORD) */
+#define IDT_NT_PCIEDCAP2		0x00064U
+/* PCI Express Device Control 2		(WORD+WORD) */
+#define IDT_NT_PCIEDCTL2		0x00068U
+/* PCI Power Management Control and Status (DWORD) */
+#define IDT_NT_PMCSR			0x000C4U
+/*==========================================*/
+/* IDT Proprietary NT-port-specific registers */
+/* NT-function main control registers */
+/* NT Endpoint Control			(DWORD) */
+#define IDT_NT_NTCTL			0x00400U
+/* NT Endpoint Interrupt Status/Mask	(DWORD) */
+#define IDT_NT_NTINTSTS			0x00404U
+#define IDT_NT_NTINTMSK			0x00408U
+/* NT Endpoint Signal Data		(DWORD) */
+#define IDT_NT_NTSDATA			0x0040CU
+/* NT Endpoint Global Signal		(DWORD) */
+#define IDT_NT_NTGSIGNAL		0x00410U
+/* Internal Error Reporting Mask 0/1	(DWORD) */
+#define IDT_NT_NTIERRORMSK0		0x00414U
+#define IDT_NT_NTIERRORMSK1		0x00418U
+/* Doorbel registers */
+/* NT Outbound Doorbell Set		(DWORD) */
+#define IDT_NT_OUTDBELLSET		0x00420U
+/* NT Inbound Doorbell Status/Mask	(DWORD) */
+#define IDT_NT_INDBELLSTS		0x00428U
+#define IDT_NT_INDBELLMSK		0x0042CU
+/* Message registers */
+/* Outbound Message N			(DWORD) */
+#define IDT_NT_OUTMSG0			0x00430U
+#define IDT_NT_OUTMSG1			0x00434U
+#define IDT_NT_OUTMSG2			0x00438U
+#define IDT_NT_OUTMSG3			0x0043CU
+/* Inbound Message N			(DWORD) */
+#define IDT_NT_INMSG0			0x00440U
+#define IDT_NT_INMSG1			0x00444U
+#define IDT_NT_INMSG2			0x00448U
+#define IDT_NT_INMSG3			0x0044CU
+/* Inbound Message Source N		(DWORD) */
+#define IDT_NT_INMSGSRC0		0x00450U
+#define IDT_NT_INMSGSRC1		0x00454U
+#define IDT_NT_INMSGSRC2		0x00458U
+#define IDT_NT_INMSGSRC3		0x0045CU
+/* Message Status			(DWORD) */
+#define IDT_NT_MSGSTS			0x00460U
+/* Message Status Mask			(DWORD) */
+#define IDT_NT_MSGSTSMSK		0x00464U
+/* BAR-setup registers */
+/* BAR N Setup/Limit Address/Lower and Upper Translated Base Address (DWORD) */
+#define IDT_NT_BARSETUP0		0x00470U
+#define IDT_NT_BARLIMIT0		0x00474U
+#define IDT_NT_BARLTBASE0		0x00478U
+#define IDT_NT_BARUTBASE0		0x0047CU
+#define IDT_NT_BARSETUP1		0x00480U
+#define IDT_NT_BARLIMIT1		0x00484U
+#define IDT_NT_BARLTBASE1		0x00488U
+#define IDT_NT_BARUTBASE1		0x0048CU
+#define IDT_NT_BARSETUP2		0x00490U
+#define IDT_NT_BARLIMIT2		0x00494U
+#define IDT_NT_BARLTBASE2		0x00498U
+#define IDT_NT_BARUTBASE2		0x0049CU
+#define IDT_NT_BARSETUP3		0x004A0U
+#define IDT_NT_BARLIMIT3		0x004A4U
+#define IDT_NT_BARLTBASE3		0x004A8U
+#define IDT_NT_BARUTBASE3		0x004ACU
+#define IDT_NT_BARSETUP4		0x004B0U
+#define IDT_NT_BARLIMIT4		0x004B4U
+#define IDT_NT_BARLTBASE4		0x004B8U
+#define IDT_NT_BARUTBASE4		0x004BCU
+#define IDT_NT_BARSETUP5		0x004C0U
+#define IDT_NT_BARLIMIT5		0x004C4U
+#define IDT_NT_BARLTBASE5		0x004C8U
+#define IDT_NT_BARUTBASE5		0x004CCU
+/* NT mapping table registers */
+/* NT Mapping Table Address/Status/Data	(DWORD) */
+#define IDT_NT_NTMTBLADDR		0x004D0U
+#define IDT_NT_NTMTBLSTS		0x004D4U
+#define IDT_NT_NTMTBLDATA		0x004D8U
+/* Requester ID (Bus:Device:Function) Capture	(DWORD) */
+#define IDT_NT_REQIDCAP			0x004DCU
+/* Memory Windows Lookup table registers */
+/* Lookup Table Offset/Lower, Middle and Upper data	(DWORD) */
+#define IDT_NT_LUTOFFSET		0x004E0U
+#define IDT_NT_LUTLDATA			0x004E4U
+#define IDT_NT_LUTMDATA			0x004E8U
+#define IDT_NT_LUTUDATA			0x004ECU
+/* NT Endpoint Uncorrectable/Correctable Errors Emulation registers (DWORD) */
+#define IDT_NT_NTUEEM			0x004F0U
+#define IDT_NT_NTCEEM			0x004F4U
+/* Global Address Space Access/Data registers	(DWARD) */
+#define IDT_NT_GASAADDR			0x00FF8U
+#define IDT_NT_GASADATA			0x00FFCU
+
+/*
+ * IDT PCIe-switch Global Configuration and Status registers
+ */
+/* Port N Configuration register in global space */
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP0_PCIECMDSTS		0x01004U
+#define IDT_SW_NTP0_PCIELCTLSTS		0x01050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP0_NTCTL		0x01400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP0_BARSETUP0		0x01470U
+#define IDT_SW_NTP0_BARLIMIT0		0x01474U
+#define IDT_SW_NTP0_BARLTBASE0		0x01478U
+#define IDT_SW_NTP0_BARUTBASE0		0x0147CU
+#define IDT_SW_NTP0_BARSETUP1		0x01480U
+#define IDT_SW_NTP0_BARLIMIT1		0x01484U
+#define IDT_SW_NTP0_BARLTBASE1		0x01488U
+#define IDT_SW_NTP0_BARUTBASE1		0x0148CU
+#define IDT_SW_NTP0_BARSETUP2		0x01490U
+#define IDT_SW_NTP0_BARLIMIT2		0x01494U
+#define IDT_SW_NTP0_BARLTBASE2		0x01498U
+#define IDT_SW_NTP0_BARUTBASE2		0x0149CU
+#define IDT_SW_NTP0_BARSETUP3		0x014A0U
+#define IDT_SW_NTP0_BARLIMIT3		0x014A4U
+#define IDT_SW_NTP0_BARLTBASE3		0x014A8U
+#define IDT_SW_NTP0_BARUTBASE3		0x014ACU
+#define IDT_SW_NTP0_BARSETUP4		0x014B0U
+#define IDT_SW_NTP0_BARLIMIT4		0x014B4U
+#define IDT_SW_NTP0_BARLTBASE4		0x014B8U
+#define IDT_SW_NTP0_BARUTBASE4		0x014BCU
+#define IDT_SW_NTP0_BARSETUP5		0x014C0U
+#define IDT_SW_NTP0_BARLIMIT5		0x014C4U
+#define IDT_SW_NTP0_BARLTBASE5		0x014C8U
+#define IDT_SW_NTP0_BARUTBASE5		0x014CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP2_PCIECMDSTS		0x05004U
+#define IDT_SW_NTP2_PCIELCTLSTS		0x05050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP2_NTCTL		0x05400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP2_BARSETUP0		0x05470U
+#define IDT_SW_NTP2_BARLIMIT0		0x05474U
+#define IDT_SW_NTP2_BARLTBASE0		0x05478U
+#define IDT_SW_NTP2_BARUTBASE0		0x0547CU
+#define IDT_SW_NTP2_BARSETUP1		0x05480U
+#define IDT_SW_NTP2_BARLIMIT1		0x05484U
+#define IDT_SW_NTP2_BARLTBASE1		0x05488U
+#define IDT_SW_NTP2_BARUTBASE1		0x0548CU
+#define IDT_SW_NTP2_BARSETUP2		0x05490U
+#define IDT_SW_NTP2_BARLIMIT2		0x05494U
+#define IDT_SW_NTP2_BARLTBASE2		0x05498U
+#define IDT_SW_NTP2_BARUTBASE2		0x0549CU
+#define IDT_SW_NTP2_BARSETUP3		0x054A0U
+#define IDT_SW_NTP2_BARLIMIT3		0x054A4U
+#define IDT_SW_NTP2_BARLTBASE3		0x054A8U
+#define IDT_SW_NTP2_BARUTBASE3		0x054ACU
+#define IDT_SW_NTP2_BARSETUP4		0x054B0U
+#define IDT_SW_NTP2_BARLIMIT4		0x054B4U
+#define IDT_SW_NTP2_BARLTBASE4		0x054B8U
+#define IDT_SW_NTP2_BARUTBASE4		0x054BCU
+#define IDT_SW_NTP2_BARSETUP5		0x054C0U
+#define IDT_SW_NTP2_BARLIMIT5		0x054C4U
+#define IDT_SW_NTP2_BARLTBASE5		0x054C8U
+#define IDT_SW_NTP2_BARUTBASE5		0x054CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP4_PCIECMDSTS		0x09004U
+#define IDT_SW_NTP4_PCIELCTLSTS		0x09050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP4_NTCTL		0x09400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP4_BARSETUP0		0x09470U
+#define IDT_SW_NTP4_BARLIMIT0		0x09474U
+#define IDT_SW_NTP4_BARLTBASE0		0x09478U
+#define IDT_SW_NTP4_BARUTBASE0		0x0947CU
+#define IDT_SW_NTP4_BARSETUP1		0x09480U
+#define IDT_SW_NTP4_BARLIMIT1		0x09484U
+#define IDT_SW_NTP4_BARLTBASE1		0x09488U
+#define IDT_SW_NTP4_BARUTBASE1		0x0948CU
+#define IDT_SW_NTP4_BARSETUP2		0x09490U
+#define IDT_SW_NTP4_BARLIMIT2		0x09494U
+#define IDT_SW_NTP4_BARLTBASE2		0x09498U
+#define IDT_SW_NTP4_BARUTBASE2		0x0949CU
+#define IDT_SW_NTP4_BARSETUP3		0x094A0U
+#define IDT_SW_NTP4_BARLIMIT3		0x094A4U
+#define IDT_SW_NTP4_BARLTBASE3		0x094A8U
+#define IDT_SW_NTP4_BARUTBASE3		0x094ACU
+#define IDT_SW_NTP4_BARSETUP4		0x094B0U
+#define IDT_SW_NTP4_BARLIMIT4		0x094B4U
+#define IDT_SW_NTP4_BARLTBASE4		0x094B8U
+#define IDT_SW_NTP4_BARUTBASE4		0x094BCU
+#define IDT_SW_NTP4_BARSETUP5		0x094C0U
+#define IDT_SW_NTP4_BARLIMIT5		0x094C4U
+#define IDT_SW_NTP4_BARLTBASE5		0x094C8U
+#define IDT_SW_NTP4_BARUTBASE5		0x094CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP6_PCIECMDSTS		0x0D004U
+#define IDT_SW_NTP6_PCIELCTLSTS		0x0D050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP6_NTCTL		0x0D400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP6_BARSETUP0		0x0D470U
+#define IDT_SW_NTP6_BARLIMIT0		0x0D474U
+#define IDT_SW_NTP6_BARLTBASE0		0x0D478U
+#define IDT_SW_NTP6_BARUTBASE0		0x0D47CU
+#define IDT_SW_NTP6_BARSETUP1		0x0D480U
+#define IDT_SW_NTP6_BARLIMIT1		0x0D484U
+#define IDT_SW_NTP6_BARLTBASE1		0x0D488U
+#define IDT_SW_NTP6_BARUTBASE1		0x0D48CU
+#define IDT_SW_NTP6_BARSETUP2		0x0D490U
+#define IDT_SW_NTP6_BARLIMIT2		0x0D494U
+#define IDT_SW_NTP6_BARLTBASE2		0x0D498U
+#define IDT_SW_NTP6_BARUTBASE2		0x0D49CU
+#define IDT_SW_NTP6_BARSETUP3		0x0D4A0U
+#define IDT_SW_NTP6_BARLIMIT3		0x0D4A4U
+#define IDT_SW_NTP6_BARLTBASE3		0x0D4A8U
+#define IDT_SW_NTP6_BARUTBASE3		0x0D4ACU
+#define IDT_SW_NTP6_BARSETUP4		0x0D4B0U
+#define IDT_SW_NTP6_BARLIMIT4		0x0D4B4U
+#define IDT_SW_NTP6_BARLTBASE4		0x0D4B8U
+#define IDT_SW_NTP6_BARUTBASE4		0x0D4BCU
+#define IDT_SW_NTP6_BARSETUP5		0x0D4C0U
+#define IDT_SW_NTP6_BARLIMIT5		0x0D4C4U
+#define IDT_SW_NTP6_BARLTBASE5		0x0D4C8U
+#define IDT_SW_NTP6_BARUTBASE5		0x0D4CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP8_PCIECMDSTS		0x11004U
+#define IDT_SW_NTP8_PCIELCTLSTS		0x11050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP8_NTCTL		0x11400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP8_BARSETUP0		0x11470U
+#define IDT_SW_NTP8_BARLIMIT0		0x11474U
+#define IDT_SW_NTP8_BARLTBASE0		0x11478U
+#define IDT_SW_NTP8_BARUTBASE0		0x1147CU
+#define IDT_SW_NTP8_BARSETUP1		0x11480U
+#define IDT_SW_NTP8_BARLIMIT1		0x11484U
+#define IDT_SW_NTP8_BARLTBASE1		0x11488U
+#define IDT_SW_NTP8_BARUTBASE1		0x1148CU
+#define IDT_SW_NTP8_BARSETUP2		0x11490U
+#define IDT_SW_NTP8_BARLIMIT2		0x11494U
+#define IDT_SW_NTP8_BARLTBASE2		0x11498U
+#define IDT_SW_NTP8_BARUTBASE2		0x1149CU
+#define IDT_SW_NTP8_BARSETUP3		0x114A0U
+#define IDT_SW_NTP8_BARLIMIT3		0x114A4U
+#define IDT_SW_NTP8_BARLTBASE3		0x114A8U
+#define IDT_SW_NTP8_BARUTBASE3		0x114ACU
+#define IDT_SW_NTP8_BARSETUP4		0x114B0U
+#define IDT_SW_NTP8_BARLIMIT4		0x114B4U
+#define IDT_SW_NTP8_BARLTBASE4		0x114B8U
+#define IDT_SW_NTP8_BARUTBASE4		0x114BCU
+#define IDT_SW_NTP8_BARSETUP5		0x114C0U
+#define IDT_SW_NTP8_BARLIMIT5		0x114C4U
+#define IDT_SW_NTP8_BARLTBASE5		0x114C8U
+#define IDT_SW_NTP8_BARUTBASE5		0x114CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP12_PCIECMDSTS		0x19004U
+#define IDT_SW_NTP12_PCIELCTLSTS	0x19050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP12_NTCTL		0x19400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP12_BARSETUP0		0x19470U
+#define IDT_SW_NTP12_BARLIMIT0		0x19474U
+#define IDT_SW_NTP12_BARLTBASE0		0x19478U
+#define IDT_SW_NTP12_BARUTBASE0		0x1947CU
+#define IDT_SW_NTP12_BARSETUP1		0x19480U
+#define IDT_SW_NTP12_BARLIMIT1		0x19484U
+#define IDT_SW_NTP12_BARLTBASE1		0x19488U
+#define IDT_SW_NTP12_BARUTBASE1		0x1948CU
+#define IDT_SW_NTP12_BARSETUP2		0x19490U
+#define IDT_SW_NTP12_BARLIMIT2		0x19494U
+#define IDT_SW_NTP12_BARLTBASE2		0x19498U
+#define IDT_SW_NTP12_BARUTBASE2		0x1949CU
+#define IDT_SW_NTP12_BARSETUP3		0x194A0U
+#define IDT_SW_NTP12_BARLIMIT3		0x194A4U
+#define IDT_SW_NTP12_BARLTBASE3		0x194A8U
+#define IDT_SW_NTP12_BARUTBASE3		0x194ACU
+#define IDT_SW_NTP12_BARSETUP4		0x194B0U
+#define IDT_SW_NTP12_BARLIMIT4		0x194B4U
+#define IDT_SW_NTP12_BARLTBASE4		0x194B8U
+#define IDT_SW_NTP12_BARUTBASE4		0x194BCU
+#define IDT_SW_NTP12_BARSETUP5		0x194C0U
+#define IDT_SW_NTP12_BARLIMIT5		0x194C4U
+#define IDT_SW_NTP12_BARLTBASE5		0x194C8U
+#define IDT_SW_NTP12_BARUTBASE5		0x194CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP16_PCIECMDSTS		0x21004U
+#define IDT_SW_NTP16_PCIELCTLSTS	0x21050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP16_NTCTL		0x21400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP16_BARSETUP0		0x21470U
+#define IDT_SW_NTP16_BARLIMIT0		0x21474U
+#define IDT_SW_NTP16_BARLTBASE0		0x21478U
+#define IDT_SW_NTP16_BARUTBASE0		0x2147CU
+#define IDT_SW_NTP16_BARSETUP1		0x21480U
+#define IDT_SW_NTP16_BARLIMIT1		0x21484U
+#define IDT_SW_NTP16_BARLTBASE1		0x21488U
+#define IDT_SW_NTP16_BARUTBASE1		0x2148CU
+#define IDT_SW_NTP16_BARSETUP2		0x21490U
+#define IDT_SW_NTP16_BARLIMIT2		0x21494U
+#define IDT_SW_NTP16_BARLTBASE2		0x21498U
+#define IDT_SW_NTP16_BARUTBASE2		0x2149CU
+#define IDT_SW_NTP16_BARSETUP3		0x214A0U
+#define IDT_SW_NTP16_BARLIMIT3		0x214A4U
+#define IDT_SW_NTP16_BARLTBASE3		0x214A8U
+#define IDT_SW_NTP16_BARUTBASE3		0x214ACU
+#define IDT_SW_NTP16_BARSETUP4		0x214B0U
+#define IDT_SW_NTP16_BARLIMIT4		0x214B4U
+#define IDT_SW_NTP16_BARLTBASE4		0x214B8U
+#define IDT_SW_NTP16_BARUTBASE4		0x214BCU
+#define IDT_SW_NTP16_BARSETUP5		0x214C0U
+#define IDT_SW_NTP16_BARLIMIT5		0x214C4U
+#define IDT_SW_NTP16_BARLTBASE5		0x214C8U
+#define IDT_SW_NTP16_BARUTBASE5		0x214CCU
+/* PCI Express command/status and link control/status registers (WORD+WORD) */
+#define IDT_SW_NTP20_PCIECMDSTS		0x29004U
+#define IDT_SW_NTP20_PCIELCTLSTS	0x29050U
+/* NT-function control register		(DWORD) */
+#define IDT_SW_NTP20_NTCTL		0x29400U
+/* BAR setup/limit/base address registers (DWORD) */
+#define IDT_SW_NTP20_BARSETUP0		0x29470U
+#define IDT_SW_NTP20_BARLIMIT0		0x29474U
+#define IDT_SW_NTP20_BARLTBASE0		0x29478U
+#define IDT_SW_NTP20_BARUTBASE0		0x2947CU
+#define IDT_SW_NTP20_BARSETUP1		0x29480U
+#define IDT_SW_NTP20_BARLIMIT1		0x29484U
+#define IDT_SW_NTP20_BARLTBASE1		0x29488U
+#define IDT_SW_NTP20_BARUTBASE1		0x2948CU
+#define IDT_SW_NTP20_BARSETUP2		0x29490U
+#define IDT_SW_NTP20_BARLIMIT2		0x29494U
+#define IDT_SW_NTP20_BARLTBASE2		0x29498U
+#define IDT_SW_NTP20_BARUTBASE2		0x2949CU
+#define IDT_SW_NTP20_BARSETUP3		0x294A0U
+#define IDT_SW_NTP20_BARLIMIT3		0x294A4U
+#define IDT_SW_NTP20_BARLTBASE3		0x294A8U
+#define IDT_SW_NTP20_BARUTBASE3		0x294ACU
+#define IDT_SW_NTP20_BARSETUP4		0x294B0U
+#define IDT_SW_NTP20_BARLIMIT4		0x294B4U
+#define IDT_SW_NTP20_BARLTBASE4		0x294B8U
+#define IDT_SW_NTP20_BARUTBASE4		0x294BCU
+#define IDT_SW_NTP20_BARSETUP5		0x294C0U
+#define IDT_SW_NTP20_BARLIMIT5		0x294C4U
+#define IDT_SW_NTP20_BARLTBASE5		0x294C8U
+#define IDT_SW_NTP20_BARUTBASE5		0x294CCU
+/* IDT PCIe-switch control register	(DWORD) */
+#define IDT_SW_CTL			0x3E000U
+/* Boot Configuration Vector Status	(DWORD) */
+#define IDT_SW_BCVSTS			0x3E004U
+/* Port Clocking Mode			(DWORD) */
+#define IDT_SW_PCLKMODE			0x3E008U
+/* Reset Drain Delay			(DWORD) */
+#define IDT_SW_RDRAINDELAY		0x3E080U
+/* Port Operating Mode Change Drain Delay (DWORD) */
+#define IDT_SW_POMCDELAY		0x3E084U
+/* Side Effect Delay			(DWORD) */
+#define IDT_SW_SEDELAY			0x3E088U
+/* Upstream Secondary Bus Reset Delay	(DWORD) */
+#define IDT_SW_SSBRDELAY		0x3E08CU
+/* Switch partition N Control/Status/Failover registers */
+#define IDT_SW_SWPART0CTL		0x3E100U
+#define IDT_SW_SWPART0STS		0x3E104U
+#define IDT_SW_SWPART0FCTL		0x3E108U
+#define IDT_SW_SWPART1CTL		0x3E120U
+#define IDT_SW_SWPART1STS		0x3E124U
+#define IDT_SW_SWPART1FCTL		0x3E128U
+#define IDT_SW_SWPART2CTL		0x3E140U
+#define IDT_SW_SWPART2STS		0x3E144U
+#define IDT_SW_SWPART2FCTL		0x3E148U
+#define IDT_SW_SWPART3CTL		0x3E160U
+#define IDT_SW_SWPART3STS		0x3E164U
+#define IDT_SW_SWPART3FCTL		0x3E168U
+#define IDT_SW_SWPART4CTL		0x3E180U
+#define IDT_SW_SWPART4STS		0x3E184U
+#define IDT_SW_SWPART4FCTL		0x3E188U
+#define IDT_SW_SWPART5CTL		0x3E1A0U
+#define IDT_SW_SWPART5STS		0x3E1A4U
+#define IDT_SW_SWPART5FCTL		0x3E1A8U
+#define IDT_SW_SWPART6CTL		0x3E1C0U
+#define IDT_SW_SWPART6STS		0x3E1C4U
+#define IDT_SW_SWPART6FCTL		0x3E1C8U
+#define IDT_SW_SWPART7CTL		0x3E1E0U
+#define IDT_SW_SWPART7STS		0x3E1E4U
+#define IDT_SW_SWPART7FCTL		0x3E1E8U
+/* Switch port N control and status registers */
+#define IDT_SW_SWPORT0CTL		0x3E200U
+#define IDT_SW_SWPORT0STS		0x3E204U
+#define IDT_SW_SWPORT0FCTL		0x3E208U
+#define IDT_SW_SWPORT2CTL		0x3E240U
+#define IDT_SW_SWPORT2STS		0x3E244U
+#define IDT_SW_SWPORT2FCTL		0x3E248U
+#define IDT_SW_SWPORT4CTL		0x3E280U
+#define IDT_SW_SWPORT4STS		0x3E284U
+#define IDT_SW_SWPORT4FCTL		0x3E288U
+#define IDT_SW_SWPORT6CTL		0x3E2C0U
+#define IDT_SW_SWPORT6STS		0x3E2C4U
+#define IDT_SW_SWPORT6FCTL		0x3E2C8U
+#define IDT_SW_SWPORT8CTL		0x3E300U
+#define IDT_SW_SWPORT8STS		0x3E304U
+#define IDT_SW_SWPORT8FCTL		0x3E308U
+#define IDT_SW_SWPORT12CTL		0x3E380U
+#define IDT_SW_SWPORT12STS		0x3E384U
+#define IDT_SW_SWPORT12FCTL		0x3E388U
+#define IDT_SW_SWPORT16CTL		0x3E400U
+#define IDT_SW_SWPORT16STS		0x3E404U
+#define IDT_SW_SWPORT16FCTL		0x3E408U
+#define IDT_SW_SWPORT20CTL		0x3E480U
+#define IDT_SW_SWPORT20STS		0x3E484U
+#define IDT_SW_SWPORT20FCTL		0x3E488U
+/* Switch Event registers */
+/* Switch Event Status/Mask/Partition mask (DWORD) */
+#define IDT_SW_SESTS			0x3EC00U
+#define IDT_SW_SEMSK			0x3EC04U
+#define IDT_SW_SEPMSK			0x3EC08U
+/* Switch Event Link Up/Down Status/Mask (DWORD) */
+#define IDT_SW_SELINKUPSTS		0x3EC0CU
+#define IDT_SW_SELINKUPMSK		0x3EC10U
+#define IDT_SW_SELINKDNSTS		0x3EC14U
+#define IDT_SW_SELINKDNMSK		0x3EC18U
+/* Switch Event Fundamental Reset Status/Mask (DWORD) */
+#define IDT_SW_SEFRSTSTS		0x3EC1CU
+#define IDT_SW_SEFRSTMSK		0x3EC20U
+/* Switch Event Hot Reset Status/Mask	(DWORD) */
+#define IDT_SW_SEHRSTSTS		0x3EC24U
+#define IDT_SW_SEHRSTMSK		0x3EC28U
+/* Switch Event Failover Mask		(DWORD) */
+#define IDT_SW_SEFOVRMSK		0x3EC2CU
+/* Switch Event Global Signal Status/Mask (DWORD) */
+#define IDT_SW_SEGSIGSTS		0x3EC30U
+#define IDT_SW_SEGSIGMSK		0x3EC34U
+/* NT Global Doorbell Status		(DWORD) */
+#define IDT_SW_GDBELLSTS		0x3EC3CU
+/* Switch partition N message M control (msgs routing table) (DWORD) */
+#define IDT_SW_SWP0MSGCTL0		0x3EE00U
+#define IDT_SW_SWP1MSGCTL0		0x3EE04U
+#define IDT_SW_SWP2MSGCTL0		0x3EE08U
+#define IDT_SW_SWP3MSGCTL0		0x3EE0CU
+#define IDT_SW_SWP4MSGCTL0		0x3EE10U
+#define IDT_SW_SWP5MSGCTL0		0x3EE14U
+#define IDT_SW_SWP6MSGCTL0		0x3EE18U
+#define IDT_SW_SWP7MSGCTL0		0x3EE1CU
+#define IDT_SW_SWP0MSGCTL1		0x3EE20U
+#define IDT_SW_SWP1MSGCTL1		0x3EE24U
+#define IDT_SW_SWP2MSGCTL1		0x3EE28U
+#define IDT_SW_SWP3MSGCTL1		0x3EE2CU
+#define IDT_SW_SWP4MSGCTL1		0x3EE30U
+#define IDT_SW_SWP5MSGCTL1		0x3EE34U
+#define IDT_SW_SWP6MSGCTL1		0x3EE38U
+#define IDT_SW_SWP7MSGCTL1		0x3EE3CU
+#define IDT_SW_SWP0MSGCTL2		0x3EE40U
+#define IDT_SW_SWP1MSGCTL2		0x3EE44U
+#define IDT_SW_SWP2MSGCTL2		0x3EE48U
+#define IDT_SW_SWP3MSGCTL2		0x3EE4CU
+#define IDT_SW_SWP4MSGCTL2		0x3EE50U
+#define IDT_SW_SWP5MSGCTL2		0x3EE54U
+#define IDT_SW_SWP6MSGCTL2		0x3EE58U
+#define IDT_SW_SWP7MSGCTL2		0x3EE5CU
+#define IDT_SW_SWP0MSGCTL3		0x3EE60U
+#define IDT_SW_SWP1MSGCTL3		0x3EE64U
+#define IDT_SW_SWP2MSGCTL3		0x3EE68U
+#define IDT_SW_SWP3MSGCTL3		0x3EE6CU
+#define IDT_SW_SWP4MSGCTL3		0x3EE70U
+#define IDT_SW_SWP5MSGCTL3		0x3EE74U
+#define IDT_SW_SWP6MSGCTL3		0x3EE78U
+#define IDT_SW_SWP7MSGCTL3		0x3EE7CU
+/* SMBus Status and Control registers	(DWORD) */
+#define IDT_SW_SMBUSSTS			0x3F188U
+#define IDT_SW_SMBUSCTL			0x3F18CU
+/* Serial EEPROM Interface		(DWORD) */
+#define IDT_SW_EEPROMINTF		0x3F190U
+/* MBus I/O Expander Address N		(DWORD) */
+#define IDT_SW_IOEXPADDR0		0x3F198U
+#define IDT_SW_IOEXPADDR1		0x3F19CU
+#define IDT_SW_IOEXPADDR2		0x3F1A0U
+#define IDT_SW_IOEXPADDR3		0x3F1A4U
+#define IDT_SW_IOEXPADDR4		0x3F1A8U
+#define IDT_SW_IOEXPADDR5		0x3F1ACU
+/* General Purpose Events Control and Status registers (DWORD) */
+#define IDT_SW_GPECTL			0x3F1B0U
+#define IDT_SW_GPESTS			0x3F1B4U
+/* Temperature sensor Control/Status/Alarm/Adjustment/Slope registers */
+#define IDT_SW_TMPCTL			0x3F1D4U
+#define IDT_SW_TMPSTS			0x3F1D8U
+#define IDT_SW_TMPALARM			0x3F1DCU
+#define IDT_SW_TMPADJ			0x3F1E0U
+#define IDT_SW_TSSLOPE			0x3F1E4U
+/* SMBus Configuration Block header log	(DWORD) */
+#define IDT_SW_SMBUSCBHL		0x3F1E8U
+
+/*
+ * Common registers related constants
+ * @IDT_REG_ALIGN:	Registers alignment used in the driver
+ * @IDT_REG_PCI_MAX:	Maximum PCI configuration space register value
+ * @IDT_REG_SW_MAX:	Maximum global register value
+ */
+#define IDT_REG_ALIGN			4
+#define IDT_REG_PCI_MAX			0x00FFFU
+#define IDT_REG_SW_MAX			0x3FFFFU
+
+/*
+ * PCICMDSTS register fields related constants
+ * @IDT_PCICMDSTS_IOAE:	I/O access enable
+ * @IDT_PCICMDSTS_MAE:	Memory access enable
+ * @IDT_PCICMDSTS_BME:	Bus master enable
+ */
+#define IDT_PCICMDSTS_IOAE		0x00000001U
+#define IDT_PCICMDSTS_MAE		0x00000002U
+#define IDT_PCICMDSTS_BME		0x00000004U
+
+/*
+ * PCIEDCAP register fields related constants
+ * @IDT_PCIEDCAP_MPAYLOAD_MASK:	 Maximum payload size mask
+ * @IDT_PCIEDCAP_MPAYLOAD_FLD:	 Maximum payload size field offset
+ * @IDT_PCIEDCAP_MPAYLOAD_S128:	 Max supported payload size of 128 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S256:	 Max supported payload size of 256 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S512:	 Max supported payload size of 512 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S1024: Max supported payload size of 1024 bytes
+ * @IDT_PCIEDCAP_MPAYLOAD_S2048: Max supported payload size of 2048 bytes
+ */
+#define IDT_PCIEDCAP_MPAYLOAD_MASK	0x00000007U
+#define IDT_PCIEDCAP_MPAYLOAD_FLD	0
+#define IDT_PCIEDCAP_MPAYLOAD_S128	0x00000000U
+#define IDT_PCIEDCAP_MPAYLOAD_S256	0x00000001U
+#define IDT_PCIEDCAP_MPAYLOAD_S512	0x00000002U
+#define IDT_PCIEDCAP_MPAYLOAD_S1024	0x00000003U
+#define IDT_PCIEDCAP_MPAYLOAD_S2048	0x00000004U
+
+/*
+ * PCIEDCTLSTS registers fields related constants
+ * @IDT_PCIEDCTL_MPS_MASK:	Maximum payload size mask
+ * @IDT_PCIEDCTL_MPS_FLD:	MPS field offset
+ * @IDT_PCIEDCTL_MPS_S128:	Max payload size of 128 bytes
+ * @IDT_PCIEDCTL_MPS_S256:	Max payload size of 256 bytes
+ * @IDT_PCIEDCTL_MPS_S512:	Max payload size of 512 bytes
+ * @IDT_PCIEDCTL_MPS_S1024:	Max payload size of 1024 bytes
+ * @IDT_PCIEDCTL_MPS_S2048:	Max payload size of 2048 bytes
+ * @IDT_PCIEDCTL_MPS_S4096:	Max payload size of 4096 bytes
+ */
+#define IDT_PCIEDCTLSTS_MPS_MASK	0x000000E0U
+#define IDT_PCIEDCTLSTS_MPS_FLD		5
+#define IDT_PCIEDCTLSTS_MPS_S128	0x00000000U
+#define IDT_PCIEDCTLSTS_MPS_S256	0x00000020U
+#define IDT_PCIEDCTLSTS_MPS_S512	0x00000040U
+#define IDT_PCIEDCTLSTS_MPS_S1024	0x00000060U
+#define IDT_PCIEDCTLSTS_MPS_S2048	0x00000080U
+#define IDT_PCIEDCTLSTS_MPS_S4096	0x000000A0U
+
+/*
+ * PCIELCAP register fields related constants
+ * @IDT_PCIELCAP_PORTNUM_MASK:	Port number field mask
+ * @IDT_PCIELCAP_PORTNUM_FLD:	Port number field offset
+ */
+#define IDT_PCIELCAP_PORTNUM_MASK	0xFF000000U
+#define IDT_PCIELCAP_PORTNUM_FLD	24
+
+/*
+ * PCIELCTLSTS registers fields related constants
+ * @IDT_PCIELSTS_CLS_MASK:	Current link speed mask
+ * @IDT_PCIELSTS_CLS_FLD:	Current link speed field offset
+ * @IDT_PCIELSTS_NLW_MASK:	Negotiated link width mask
+ * @IDT_PCIELSTS_NLW_FLD:	Negotiated link width field offset
+ * @IDT_PCIELSTS_SCLK_COM:	Common slot clock configuration
+ */
+#define IDT_PCIELCTLSTS_CLS_MASK	0x000F0000U
+#define IDT_PCIELCTLSTS_CLS_FLD		16
+#define IDT_PCIELCTLSTS_NLW_MASK	0x03F00000U
+#define IDT_PCIELCTLSTS_NLW_FLD		20
+#define IDT_PCIELCTLSTS_SCLK_COM	0x10000000U
+
+/*
+ * NTCTL register fields related constants
+ * @IDT_NTCTL_IDPROTDIS:	ID Protection check disable (disable MTBL)
+ * @IDT_NTCTL_CPEN:		Completion enable
+ * @IDT_NTCTL_RNS:		Request no snoop processing (if MTBL disabled)
+ * @IDT_NTCTL_ATP:		Address type processing (if MTBL disabled)
+ */
+#define IDT_NTCTL_IDPROTDIS		0x00000001U
+#define IDT_NTCTL_CPEN			0x00000002U
+#define IDT_NTCTL_RNS			0x00000004U
+#define IDT_NTCTL_ATP			0x00000008U
+
+/*
+ * NTINTSTS register fields related constants
+ * @IDT_NTINTSTS_MSG:		Message interrupt bit
+ * @IDT_NTINTSTS_DBELL:		Doorbell interrupt bit
+ * @IDT_NTINTSTS_SEVENT:	Switch Event interrupt bit
+ * @IDT_NTINTSTS_TMPSENSOR:	Temperature sensor interrupt bit
+ */
+#define IDT_NTINTSTS_MSG		0x00000001U
+#define IDT_NTINTSTS_DBELL		0x00000002U
+#define IDT_NTINTSTS_SEVENT		0x00000008U
+#define IDT_NTINTSTS_TMPSENSOR		0x00000080U
+
+/*
+ * NTINTMSK register fields related constants
+ * @IDT_NTINTMSK_MSG:		Message interrupt mask bit
+ * @IDT_NTINTMSK_DBELL:		Doorbell interrupt mask bit
+ * @IDT_NTINTMSK_SEVENT:	Switch Event interrupt mask bit
+ * @IDT_NTINTMSK_TMPSENSOR:	Temperature sensor interrupt mask bit
+ * @IDT_NTINTMSK_ALL:		All the useful interrupts mask
+ */
+#define IDT_NTINTMSK_MSG		0x00000001U
+#define IDT_NTINTMSK_DBELL		0x00000002U
+#define IDT_NTINTMSK_SEVENT		0x00000008U
+#define IDT_NTINTMSK_TMPSENSOR		0x00000080U
+#define IDT_NTINTMSK_ALL \
+	(IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | \
+	 IDT_NTINTMSK_SEVENT | IDT_NTINTMSK_TMPSENSOR)
+
+/*
+ * NTGSIGNAL register fields related constants
+ * @IDT_NTGSIGNAL_SET:	Set global signal of the local partition
+ */
+#define IDT_NTGSIGNAL_SET		0x00000001U
+
+/*
+ * BARSETUP register fields related constants
+ * @IDT_BARSETUP_TYPE_MASK:	Mask of the TYPE field
+ * @IDT_BARSETUP_TYPE_32:	32-bit addressing BAR
+ * @IDT_BARSETUP_TYPE_64:	64-bit addressing BAR
+ * @IDT_BARSETUP_PREF:		Value of the BAR prefetchable field
+ * @IDT_BARSETUP_SIZE_MASK:	Mask of the SIZE field
+ * @IDT_BARSETUP_SIZE_FLD:	SIZE field offset
+ * @IDT_BARSETUP_SIZE_CFG:	SIZE field value in case of config space MODE
+ * @IDT_BARSETUP_MODE_CFG:	Configuration space BAR mode
+ * @IDT_BARSETUP_ATRAN_MASK:	ATRAN field mask
+ * @IDT_BARSETUP_ATRAN_FLD:	ATRAN field offset
+ * @IDT_BARSETUP_ATRAN_DIR:	Direct address translation memory window
+ * @IDT_BARSETUP_ATRAN_LUT12:	12-entry lookup table
+ * @IDT_BARSETUP_ATRAN_LUT24:	24-entry lookup table
+ * @IDT_BARSETUP_TPART_MASK:	TPART field mask
+ * @IDT_BARSETUP_TPART_FLD:	TPART field offset
+ * @IDT_BARSETUP_EN:		BAR enable bit
+ */
+#define IDT_BARSETUP_TYPE_MASK		0x00000006U
+#define IDT_BARSETUP_TYPE_FLD		0
+#define IDT_BARSETUP_TYPE_32		0x00000000U
+#define IDT_BARSETUP_TYPE_64		0x00000004U
+#define IDT_BARSETUP_PREF		0x00000008U
+#define IDT_BARSETUP_SIZE_MASK		0x000003F0U
+#define IDT_BARSETUP_SIZE_FLD		4
+#define IDT_BARSETUP_SIZE_CFG		0x000000C0U
+#define IDT_BARSETUP_MODE_CFG		0x00000400U
+#define IDT_BARSETUP_ATRAN_MASK		0x00001800U
+#define IDT_BARSETUP_ATRAN_FLD		11
+#define IDT_BARSETUP_ATRAN_DIR		0x00000000U
+#define IDT_BARSETUP_ATRAN_LUT12	0x00000800U
+#define IDT_BARSETUP_ATRAN_LUT24	0x00001000U
+#define IDT_BARSETUP_TPART_MASK		0x0000E000U
+#define IDT_BARSETUP_TPART_FLD		13
+#define IDT_BARSETUP_EN			0x80000000U
+
+/*
+ * NTMTBLDATA register fields related constants
+ * @IDT_NTMTBLDATA_VALID:	Set the MTBL entry being valid
+ * @IDT_NTMTBLDATA_REQID_MASK:	Bus:Device:Function field mask
+ * @IDT_NTMTBLDATA_REQID_FLD:	Bus:Device:Function field offset
+ * @IDT_NTMTBLDATA_PART_MASK:	Partition field mask
+ * @IDT_NTMTBLDATA_PART_FLD:	Partition field offset
+ * @IDT_NTMTBLDATA_ATP_TRANS:	Enable AT field translation on request TLPs
+ * @IDT_NTMTBLDATA_CNS_INV:	Enable No Snoop attribute inversion of
+ *				Completion TLPs
+ * @IDT_NTMTBLDATA_RNS_INV:	Enable No Snoop attribute inversion of
+ *				Request TLPs
+ */
+#define IDT_NTMTBLDATA_VALID		0x00000001U
+#define IDT_NTMTBLDATA_REQID_MASK	0x0001FFFEU
+#define IDT_NTMTBLDATA_REQID_FLD	1
+#define IDT_NTMTBLDATA_PART_MASK	0x000E0000U
+#define IDT_NTMTBLDATA_PART_FLD		17
+#define IDT_NTMTBLDATA_ATP_TRANS	0x20000000U
+#define IDT_NTMTBLDATA_CNS_INV		0x40000000U
+#define IDT_NTMTBLDATA_RNS_INV		0x80000000U
+
+/*
+ * REQIDCAP register fields related constants
+ * @IDT_REQIDCAP_REQID_MASK:	Request ID field mask
+ * @IDT_REQIDCAP_REQID_FLD:	Request ID field offset
+ */
+#define IDT_REQIDCAP_REQID_MASK		0x0000FFFFU
+#define IDT_REQIDCAP_REQID_FLD		0
+
+/*
+ * LUTOFFSET register fields related constants
+ * @IDT_LUTOFFSET_INDEX_MASK:	Lookup table index field mask
+ * @IDT_LUTOFFSET_INDEX_FLD:	Lookup table index field offset
+ * @IDT_LUTOFFSET_BAR_MASK:	Lookup table BAR select field mask
+ * @IDT_LUTOFFSET_BAR_FLD:	Lookup table BAR select field offset
+ */
+#define IDT_LUTOFFSET_INDEX_MASK	0x0000001FU
+#define IDT_LUTOFFSET_INDEX_FLD		0
+#define IDT_LUTOFFSET_BAR_MASK		0x00000700U
+#define IDT_LUTOFFSET_BAR_FLD		8
+
+/*
+ * LUTUDATA register fields related constants
+ * @IDT_LUTUDATA_PART_MASK:	Partition field mask
+ * @IDT_LUTUDATA_PART_FLD:	Partition field offset
+ * @IDT_LUTUDATA_VALID:		Lookup table entry valid bit
+ */
+#define IDT_LUTUDATA_PART_MASK		0x0000000FU
+#define IDT_LUTUDATA_PART_FLD		0
+#define IDT_LUTUDATA_VALID		0x80000000U
+
+/*
+ * SWPARTxSTS register fields related constants
+ * @IDT_SWPARTxSTS_SCI:		Switch partition state change initiated
+ * @IDT_SWPARTxSTS_SCC:		Switch partition state change completed
+ * @IDT_SWPARTxSTS_STATE_MASK:	Switch partition state mask
+ * @IDT_SWPARTxSTS_STATE_FLD:	Switch partition state field offset
+ * @IDT_SWPARTxSTS_STATE_DIS:	Switch partition disabled
+ * @IDT_SWPARTxSTS_STATE_ACT:	Switch partition enabled
+ * @IDT_SWPARTxSTS_STATE_RES:	Switch partition in reset
+ * @IDT_SWPARTxSTS_US:		Switch partition has upstream port
+ * @IDT_SWPARTxSTS_USID_MASK:	Switch partition upstream port ID mask
+ * @IDT_SWPARTxSTS_USID_FLD:	Switch partition upstream port ID field offset
+ * @IDT_SWPARTxSTS_NT:		Upstream port has NT function
+ * @IDT_SWPARTxSTS_DMA:		Upstream port has DMA function
+ */
+#define IDT_SWPARTxSTS_SCI		0x00000001U
+#define IDT_SWPARTxSTS_SCC		0x00000002U
+#define IDT_SWPARTxSTS_STATE_MASK	0x00000060U
+#define IDT_SWPARTxSTS_STATE_FLD	5
+#define IDT_SWPARTxSTS_STATE_DIS	0x00000000U
+#define IDT_SWPARTxSTS_STATE_ACT	0x00000020U
+#define IDT_SWPARTxSTS_STATE_RES	0x00000060U
+#define IDT_SWPARTxSTS_US		0x00000100U
+#define IDT_SWPARTxSTS_USID_MASK	0x00003E00U
+#define IDT_SWPARTxSTS_USID_FLD		9
+#define IDT_SWPARTxSTS_NT		0x00004000U
+#define IDT_SWPARTxSTS_DMA		0x00008000U
+
+/*
+ * SWPORTxSTS register fields related constants
+ * @IDT_SWPORTxSTS_OMCI:	Operation mode change initiated
+ * @IDT_SWPORTxSTS_OMCC:	Operation mode change completed
+ * @IDT_SWPORTxSTS_LINKUP:	Link up status
+ * @IDT_SWPORTxSTS_DS:		Port lanes behave as downstream lanes
+ * @IDT_SWPORTxSTS_MODE_MASK:	Port mode field mask
+ * @IDT_SWPORTxSTS_MODE_FLD:	Port mode field offset
+ * @IDT_SWPORTxSTS_MODE_DIS:	Port mode - disabled
+ * @IDT_SWPORTxSTS_MODE_DS:	Port mode - downstream switch port
+ * @IDT_SWPORTxSTS_MODE_US:	Port mode - upstream switch port
+ * @IDT_SWPORTxSTS_MODE_NT:	Port mode - NT function
+ * @IDT_SWPORTxSTS_MODE_USNT:	Port mode - upstream switch port with NTB
+ * @IDT_SWPORTxSTS_MODE_UNAT:	Port mode - unattached
+ * @IDT_SWPORTxSTS_MODE_USDMA:	Port mode - upstream switch port with DMA
+ * @IDT_SWPORTxSTS_MODE_USNTDMA:Port mode - upstream port with NTB and DMA
+ * @IDT_SWPORTxSTS_MODE_NTDMA:	Port mode - NT function with DMA
+ * @IDT_SWPORTxSTS_SWPART_MASK:	Port partition field mask
+ * @IDT_SWPORTxSTS_SWPART_FLD:	Port partition field offset
+ * @IDT_SWPORTxSTS_DEVNUM_MASK:	Port device number field mask
+ * @IDT_SWPORTxSTS_DEVNUM_FLD:	Port device number field offset
+ */
+#define IDT_SWPORTxSTS_OMCI		0x00000001U
+#define IDT_SWPORTxSTS_OMCC		0x00000002U
+#define IDT_SWPORTxSTS_LINKUP		0x00000010U
+#define IDT_SWPORTxSTS_DS		0x00000020U
+#define IDT_SWPORTxSTS_MODE_MASK	0x000003C0U
+#define IDT_SWPORTxSTS_MODE_FLD		6
+#define IDT_SWPORTxSTS_MODE_DIS		0x00000000U
+#define IDT_SWPORTxSTS_MODE_DS		0x00000040U
+#define IDT_SWPORTxSTS_MODE_US		0x00000080U
+#define IDT_SWPORTxSTS_MODE_NT		0x000000C0U
+#define IDT_SWPORTxSTS_MODE_USNT	0x00000100U
+#define IDT_SWPORTxSTS_MODE_UNAT	0x00000140U
+#define IDT_SWPORTxSTS_MODE_USDMA	0x00000180U
+#define IDT_SWPORTxSTS_MODE_USNTDMA	0x000001C0U
+#define IDT_SWPORTxSTS_MODE_NTDMA	0x00000200U
+#define IDT_SWPORTxSTS_SWPART_MASK	0x00001C00U
+#define IDT_SWPORTxSTS_SWPART_FLD	10
+#define IDT_SWPORTxSTS_DEVNUM_MASK	0x001F0000U
+#define IDT_SWPORTxSTS_DEVNUM_FLD	16
+
+/*
+ * SEMSK register fields related constants
+ * @IDT_SEMSK_LINKUP:	Link Up event mask bit
+ * @IDT_SEMSK_LINKDN:	Link Down event mask bit
+ * @IDT_SEMSK_GSIGNAL:	Global Signal event mask bit
+ */
+#define IDT_SEMSK_LINKUP		0x00000001U
+#define IDT_SEMSK_LINKDN		0x00000002U
+#define IDT_SEMSK_GSIGNAL		0x00000020U
+
+/*
+ * SWPxMSGCTL register fields related constants
+ * @IDT_SWPxMSGCTL_REG_MASK:	Register select field mask
+ * @IDT_SWPxMSGCTL_REG_FLD:	Register select field offset
+ * @IDT_SWPxMSGCTL_PART_MASK:	Partition select field mask
+ * @IDT_SWPxMSGCTL_PART_FLD:	Partition select field offset
+ */
+#define IDT_SWPxMSGCTL_REG_MASK		0x00000003U
+#define IDT_SWPxMSGCTL_REG_FLD		0
+#define IDT_SWPxMSGCTL_PART_MASK	0x00000070U
+#define IDT_SWPxMSGCTL_PART_FLD		4
+
+/*
+ * TMPSTS register fields related constants
+ * @IDT_TMPSTS_TEMP_MASK:	Current temperature field mask
+ * @IDT_TMPSTS_TEMP_FLD:	Current temperature field offset
+ */
+#define IDT_TMPSTS_TEMP_MASK		0x000000FFU
+#define IDT_TMPSTS_TEMP_FLD		0
+
+/*
+ * Helper macro to get/set the corresponding field value
+ * @GET_FIELD:		Retrieve the value of the corresponding field
+ * @SET_FIELD:		Set the specified field up
+ * @IS_FLD_SET:		Check whether a field is set with value
+ */
+#define GET_FIELD(field, data) \
+	(((u32)(data) & IDT_ ##field## _MASK) >> IDT_ ##field## _FLD)
+#define SET_FIELD(field, data, value) \
+	(((u32)(data) & ~IDT_ ##field## _MASK) | \
+	 ((u32)(value) << IDT_ ##field## _FLD))
+#define IS_FLD_SET(field, data, value) \
+	(((u32)(data) & IDT_ ##field## _MASK) == IDT_ ##field## _ ##value)
+
+/*
+ * Useful registers masks:
+ * @IDT_DBELL_MASK:	Doorbell bits mask
+ * @IDT_OUTMSG_MASK:	Out messages status bits mask
+ * @IDT_INMSG_MASK:	In messages status bits mask
+ * @IDT_MSG_MASK:	Any message status bits mask
+ */
+#define IDT_DBELL_MASK		((u32)0xFFFFFFFFU)
+#define IDT_OUTMSG_MASK		((u32)0x0000000FU)
+#define IDT_INMSG_MASK		((u32)0x000F0000U)
+#define IDT_MSG_MASK		(IDT_INMSG_MASK | IDT_OUTMSG_MASK)
+
+/*
+ * Number of IDT NTB resources:
+ * @IDT_MSG_CNT:	Number of Message registers
+ * @IDT_BAR_CNT:	Number of BARs of each port
+ * @IDT_MTBL_ENTRY_CNT:	Number mapping table entries
+ */
+#define IDT_MSG_CNT		4
+#define IDT_BAR_CNT		6
+#define IDT_MTBL_ENTRY_CNT	64
+
+/*
+ * General IDT PCIe-switch constant
+ * @IDT_MAX_NR_PORTS:	Maximum number of ports per IDT PCIe-switch
+ * @IDT_MAX_NR_PARTS:	Maximum number of partitions per IDT PCIe-switch
+ * @IDT_MAX_NR_PEERS:	Maximum number of NT-peers per IDT PCIe-switch
+ * @IDT_MAX_NR_MWS:	Maximum number of Memory Widows
+ * @IDT_PCIE_REGSIZE:	Size of the registers in bytes
+ * @IDT_TRANS_ALIGN:	Alignment of translated base address
+ * @IDT_DIR_SIZE_ALIGN:	Alignment of size setting for direct translated MWs.
+ *			Even though the lower 10 bits are reserved, they are
+ *			treated by IDT as one's so basically there is no any
+ *			alignment of size limit for DIR address translation.
+ */
+#define IDT_MAX_NR_PORTS	24
+#define IDT_MAX_NR_PARTS	8
+#define IDT_MAX_NR_PEERS	8
+#define IDT_MAX_NR_MWS		29
+#define IDT_PCIE_REGSIZE	4
+#define IDT_TRANS_ALIGN		4
+#define IDT_DIR_SIZE_ALIGN	1
+
+/*
+ * IDT Memory Windows type. Depending on the device settings, IDT supports
+ * Direct Address Translation MW registers and Lookup Table registers
+ * @IDT_MW_DIR:		Direct address translation
+ * @IDT_MW_LUT12:	12-entry lookup table entry
+ * @IDT_MW_LUT24:	24-entry lookup table entry
+ *
+ * NOTE These values are exactly the same as one of the BARSETUP ATRAN field
+ */
+enum idt_mw_type {
+	IDT_MW_DIR = 0x0,
+	IDT_MW_LUT12 = 0x1,
+	IDT_MW_LUT24 = 0x2
+};
+
+/*
+ * IDT PCIe-switch model private data
+ * @name:	Device name
+ * @port_cnt:	Total number of NT endpoint ports
+ * @ports:	Port ids
+ */
+struct idt_89hpes_cfg {
+	char *name;
+	unsigned char port_cnt;
+	unsigned char ports[];
+};
+
+/*
+ * Memory window configuration structure
+ * @type:	Type of the memory window (direct address translation or lookup
+ *		table)
+ *
+ * @bar:	PCIe BAR the memory window referenced to
+ * @idx:	Index of the memory window within the BAR
+ *
+ * @addr_align:	Alignment of translated address
+ * @size_align:	Alignment of memory window size
+ * @size_max:	Maximum size of memory window
+ */
+struct idt_mw_cfg {
+	enum idt_mw_type type;
+
+	unsigned char bar;
+	unsigned char idx;
+
+	u64 addr_align;
+	u64 size_align;
+	u64 size_max;
+};
+
+/*
+ * Description structure of peer IDT NT-functions:
+ * @port:		NT-function port
+ * @part:		NT-function partition
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ */
+struct idt_ntb_peer {
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+};
+
+/*
+ * Description structure of local IDT NT-function:
+ * @ntb:		Linux NTB-device description structure
+ * @swcfg:		Pointer to the structure of local IDT PCIe-switch
+ *			specific cofnfigurations
+ *
+ * @port:		Local NT-function port
+ * @part:		Local NT-function partition
+ *
+ * @peer_cnt:		Number of peers with activated NTB-function
+ * @peers:		Array of peers descripting structures
+ * @port_idx_map:	Map of port number -> peer index
+ * @part_idx_map:	Map of partition number -> peer index
+ *
+ * @mtbl_lock:		Mapping table access lock
+ *
+ * @mw_cnt:		Number of memory windows supported by NT-function
+ * @mws:		Array of memory windows descriptors
+ * @lut_lock:		Lookup table access lock
+ *
+ * @msg_locks:		Message registers mapping table lockers
+ *
+ * @cfgspc:		Virtual address of the memory mapped configuration
+ *			space of the NT-function
+ * @db_mask_lock:	Doorbell mask register lock
+ * @msg_mask_lock:	Message mask register lock
+ * @gasa_lock:		GASA registers access lock
+ *
+ * @dbgfs_info:		DebugFS info node
+ */
+struct idt_ntb_dev {
+	struct ntb_dev ntb;
+	struct idt_89hpes_cfg *swcfg;
+
+	unsigned char port;
+	unsigned char part;
+
+	unsigned char peer_cnt;
+	struct idt_ntb_peer peers[IDT_MAX_NR_PEERS];
+	char port_idx_map[IDT_MAX_NR_PORTS];
+	char part_idx_map[IDT_MAX_NR_PARTS];
+
+	spinlock_t mtbl_lock;
+
+	unsigned char mw_cnt;
+	struct idt_mw_cfg *mws;
+	spinlock_t lut_lock;
+
+	spinlock_t msg_locks[IDT_MSG_CNT];
+
+	void __iomem *cfgspc;
+	spinlock_t db_mask_lock;
+	spinlock_t msg_mask_lock;
+	spinlock_t gasa_lock;
+
+	struct dentry *dbgfs_info;
+};
+#define to_ndev_ntb(__ntb) container_of(__ntb, struct idt_ntb_dev, ntb)
+
+/*
+ * Descriptor of the IDT PCIe-switch BAR resources
+ * @setup:	BAR setup register
+ * @limit:	BAR limit register
+ * @ltbase:	Lower translated base address
+ * @utbase:	Upper translated base address
+ */
+struct idt_ntb_bar {
+	unsigned int setup;
+	unsigned int limit;
+	unsigned int ltbase;
+	unsigned int utbase;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch message resources
+ * @in:		Inbound message register
+ * @out:	Outbound message register
+ * @src:	Source of inbound message register
+ */
+struct idt_ntb_msg {
+	unsigned int in;
+	unsigned int out;
+	unsigned int src;
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch NT-function specific parameters in the
+ * PCI Configuration Space
+ * @bars:	BARs related registers
+ * @msgs:	Messaging related registers
+ */
+struct idt_ntb_regs {
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+	struct idt_ntb_msg msgs[IDT_MSG_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch port specific parameters in the
+ * Global Configuration Space
+ * @pcicmdsts:	 PCI command/status register
+ * @pcielctlsts: PCIe link control/status
+ *
+ * @ctl:	Port control register
+ * @sts:	Port status register
+ *
+ * @bars:	BARs related registers
+ */
+struct idt_ntb_port {
+	unsigned int pcicmdsts;
+	unsigned int pcielctlsts;
+	unsigned int ntctl;
+
+	unsigned int ctl;
+	unsigned int sts;
+
+	struct idt_ntb_bar bars[IDT_BAR_CNT];
+};
+
+/*
+ * Descriptor of the IDT PCIe-switch partition specific parameters.
+ * @ctl:	Partition control register in the Global Address Space
+ * @sts:	Partition status register in the Global Address Space
+ * @msgctl:	Messages control registers
+ */
+struct idt_ntb_part {
+	unsigned int ctl;
+	unsigned int sts;
+	unsigned int msgctl[IDT_MSG_CNT];
+};
+
+#endif /* NTB_HW_IDT_H */

diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 7b3b6fd..2557e2c 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c

@@ -6,6 +6,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -15,6 +16,7 @@
  *
  *   Copyright(c) 2012 Intel Corporation. All rights reserved.
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -270,12 +272,12 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
 
 	if (db_addr) {
 		*db_addr = reg_addr + reg;
-		dev_dbg(ndev_dev(ndev), "Peer db addr %llx\n", *db_addr);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db addr %llx\n", *db_addr);
 	}
 
 	if (db_size) {
 		*db_size = ndev->reg->db_size;
-		dev_dbg(ndev_dev(ndev), "Peer db size %llx\n", *db_size);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db size %llx\n", *db_size);
 	}
 
 	return 0;
@@ -368,7 +370,8 @@ static inline int ndev_spad_addr(struct intel_ntb_dev *ndev, int idx,
 
 	if (spad_addr) {
 		*spad_addr = reg_addr + reg + (idx << 2);
-		dev_dbg(ndev_dev(ndev), "Peer spad addr %llx\n", *spad_addr);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer spad addr %llx\n",
+			*spad_addr);
 	}
 
 	return 0;
@@ -409,7 +412,7 @@ static irqreturn_t ndev_interrupt(struct intel_ntb_dev *ndev, int vec)
 	if ((ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) && (vec == 31))
 		vec_mask |= ndev->db_link_mask;
 
-	dev_dbg(ndev_dev(ndev), "vec %d vec_mask %llx\n", vec, vec_mask);
+	dev_dbg(&ndev->ntb.pdev->dev, "vec %d vec_mask %llx\n", vec, vec_mask);
 
 	ndev->last_ts = jiffies;
 
@@ -428,7 +431,7 @@ static irqreturn_t ndev_vec_isr(int irq, void *dev)
 {
 	struct intel_ntb_vec *nvec = dev;
 
-	dev_dbg(ndev_dev(nvec->ndev), "irq: %d  nvec->num: %d\n",
+	dev_dbg(&nvec->ndev->ntb.pdev->dev, "irq: %d  nvec->num: %d\n",
 		irq, nvec->num);
 
 	return ndev_interrupt(nvec->ndev, nvec->num);
@@ -438,7 +441,7 @@ static irqreturn_t ndev_irq_isr(int irq, void *dev)
 {
 	struct intel_ntb_dev *ndev = dev;
 
-	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+	return ndev_interrupt(ndev, irq - ndev->ntb.pdev->irq);
 }
 
 static int ndev_init_isr(struct intel_ntb_dev *ndev,
@@ -448,7 +451,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	struct pci_dev *pdev;
 	int rc, i, msix_count, node;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	node = dev_to_node(&pdev->dev);
 
@@ -487,7 +490,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 			goto err_msix_request;
 	}
 
-	dev_dbg(ndev_dev(ndev), "Using %d msix interrupts\n", msix_count);
+	dev_dbg(&pdev->dev, "Using %d msix interrupts\n", msix_count);
 	ndev->db_vec_count = msix_count;
 	ndev->db_vec_shift = msix_shift;
 	return 0;
@@ -515,7 +518,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	if (rc)
 		goto err_msi_request;
 
-	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+	dev_dbg(&pdev->dev, "Using msi interrupts\n");
 	ndev->db_vec_count = 1;
 	ndev->db_vec_shift = total_shift;
 	return 0;
@@ -533,7 +536,7 @@ static int ndev_init_isr(struct intel_ntb_dev *ndev,
 	if (rc)
 		goto err_intx_request;
 
-	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+	dev_dbg(&pdev->dev, "Using intx interrupts\n");
 	ndev->db_vec_count = 1;
 	ndev->db_vec_shift = total_shift;
 	return 0;
@@ -547,7 +550,7 @@ static void ndev_deinit_isr(struct intel_ntb_dev *ndev)
 	struct pci_dev *pdev;
 	int i;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	/* Mask all doorbell interrupts */
 	ndev->db_mask = ndev->db_valid_mask;
@@ -744,7 +747,7 @@ static ssize_t ndev_ntb_debugfs_read(struct file *filp, char __user *ubuf,
 	union { u64 v64; u32 v32; u16 v16; u8 v8; } u;
 
 	ndev = filp->private_data;
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	buf_size = min(count, 0x800ul);
@@ -1019,7 +1022,8 @@ static void ndev_init_debugfs(struct intel_ntb_dev *ndev)
 		ndev->debugfs_info = NULL;
 	} else {
 		ndev->debugfs_dir =
-			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+			debugfs_create_dir(pci_name(ndev->ntb.pdev),
+					   debugfs_dir);
 		if (!ndev->debugfs_dir)
 			ndev->debugfs_info = NULL;
 		else
@@ -1035,20 +1039,26 @@ static void ndev_deinit_debugfs(struct intel_ntb_dev *ndev)
 	debugfs_remove_recursive(ndev->debugfs_dir);
 }
 
-static int intel_ntb_mw_count(struct ntb_dev *ntb)
+static int intel_ntb_mw_count(struct ntb_dev *ntb, int pidx)
 {
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	return ntb_ndev(ntb)->mw_count;
 }
 
-static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				  phys_addr_t *base,
-				  resource_size_t *size,
-				  resource_size_t *align,
-				  resource_size_t *align_size)
+static int intel_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+				  resource_size_t *addr_align,
+				  resource_size_t *size_align,
+				  resource_size_t *size_max)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	resource_size_t bar_size, mw_size;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1056,24 +1066,26 @@ static int intel_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
 	if (bar < 0)
 		return bar;
 
-	if (base)
-		*base = pci_resource_start(ndev->ntb.pdev, bar) +
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	bar_size = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (size)
-		*size = pci_resource_len(ndev->ntb.pdev, bar) -
-			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+	if (idx == ndev->b2b_idx)
+		mw_size = bar_size - ndev->b2b_off;
+	else
+		mw_size = bar_size;
 
-	if (align)
-		*align = pci_resource_len(ndev->ntb.pdev, bar);
+	if (addr_align)
+		*addr_align = pci_resource_len(ndev->ntb.pdev, bar);
 
-	if (align_size)
-		*align_size = 1;
+	if (size_align)
+		*size_align = 1;
+
+	if (size_max)
+		*size_max = mw_size;
 
 	return 0;
 }
 
-static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				  dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1083,6 +1095,9 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1171,7 +1186,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
 	return 0;
 }
 
-static int intel_ntb_link_is_up(struct ntb_dev *ntb,
+static u64 intel_ntb_link_is_up(struct ntb_dev *ntb,
 				enum ntb_speed *speed,
 				enum ntb_width *width)
 {
@@ -1206,13 +1221,13 @@ static int intel_ntb_link_enable(struct ntb_dev *ntb,
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
 
-	dev_dbg(ndev_dev(ndev),
+	dev_dbg(&ntb->pdev->dev,
 		"Enabling link with max_speed %d max_width %d\n",
 		max_speed, max_width);
 	if (max_speed != NTB_SPEED_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
 	if (max_width != NTB_WIDTH_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
 
 	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
 	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
@@ -1235,7 +1250,7 @@ static int intel_ntb_link_disable(struct ntb_dev *ntb)
 	if (ndev->ntb.topo == NTB_TOPO_SEC)
 		return -EINVAL;
 
-	dev_dbg(ndev_dev(ndev), "Disabling link\n");
+	dev_dbg(&ntb->pdev->dev, "Disabling link\n");
 
 	/* Bring NTB link down */
 	ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
@@ -1249,6 +1264,36 @@ static int intel_ntb_link_disable(struct ntb_dev *ntb)
 	return 0;
 }
 
+static int intel_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	/* Numbers of inbound and outbound memory windows match */
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int intel_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+				     phys_addr_t *base, resource_size_t *size)
+{
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+		idx += 1;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar) +
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar) -
+			(idx == ndev->b2b_idx ? ndev->b2b_off : 0);
+
+	return 0;
+}
+
 static int intel_ntb_db_is_unsafe(struct ntb_dev *ntb)
 {
 	return ndev_ignore_unsafe(ntb_ndev(ntb), NTB_UNSAFE_DB);
@@ -1366,30 +1411,30 @@ static int intel_ntb_spad_write(struct ntb_dev *ntb,
 			       ndev->self_reg->spad);
 }
 
-static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static int intel_ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				    phys_addr_t *spad_addr)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_addr(ndev, idx, spad_addr, ndev->peer_addr,
+	return ndev_spad_addr(ndev, sidx, spad_addr, ndev->peer_addr,
 			      ndev->peer_reg->spad);
 }
 
-static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static u32 intel_ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_read(ndev, idx,
+	return ndev_spad_read(ndev, sidx,
 			      ndev->peer_mmio +
 			      ndev->peer_reg->spad);
 }
 
-static int intel_ntb_peer_spad_write(struct ntb_dev *ntb,
-				     int idx, u32 val)
+static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+				     int sidx, u32 val)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_spad_write(ndev, idx, val,
+	return ndev_spad_write(ndev, sidx, val,
 			       ndev->peer_mmio +
 			       ndev->peer_reg->spad);
 }
@@ -1442,30 +1487,33 @@ static int atom_link_is_err(struct intel_ntb_dev *ndev)
 
 static inline enum ntb_topo atom_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd)
 {
+	struct device *dev = &ndev->ntb.pdev->dev;
+
 	switch (ppd & ATOM_PPD_TOPO_MASK) {
 	case ATOM_PPD_TOPO_B2B_USD:
-		dev_dbg(ndev_dev(ndev), "PPD %d B2B USD\n", ppd);
+		dev_dbg(dev, "PPD %d B2B USD\n", ppd);
 		return NTB_TOPO_B2B_USD;
 
 	case ATOM_PPD_TOPO_B2B_DSD:
-		dev_dbg(ndev_dev(ndev), "PPD %d B2B DSD\n", ppd);
+		dev_dbg(dev, "PPD %d B2B DSD\n", ppd);
 		return NTB_TOPO_B2B_DSD;
 
 	case ATOM_PPD_TOPO_PRI_USD:
 	case ATOM_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */
 	case ATOM_PPD_TOPO_SEC_USD:
 	case ATOM_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */
-		dev_dbg(ndev_dev(ndev), "PPD %d non B2B disabled\n", ppd);
+		dev_dbg(dev, "PPD %d non B2B disabled\n", ppd);
 		return NTB_TOPO_NONE;
 	}
 
-	dev_dbg(ndev_dev(ndev), "PPD %d invalid\n", ppd);
+	dev_dbg(dev, "PPD %d invalid\n", ppd);
 	return NTB_TOPO_NONE;
 }
 
 static void atom_link_hb(struct work_struct *work)
 {
 	struct intel_ntb_dev *ndev = hb_ndev(work);
+	struct device *dev = &ndev->ntb.pdev->dev;
 	unsigned long poll_ts;
 	void __iomem *mmio;
 	u32 status32;
@@ -1503,30 +1551,30 @@ static void atom_link_hb(struct work_struct *work)
 
 	/* Clear AER Errors, write to clear */
 	status32 = ioread32(mmio + ATOM_ERRCORSTS_OFFSET);
-	dev_dbg(ndev_dev(ndev), "ERRCORSTS = %x\n", status32);
+	dev_dbg(dev, "ERRCORSTS = %x\n", status32);
 	status32 &= PCI_ERR_COR_REP_ROLL;
 	iowrite32(status32, mmio + ATOM_ERRCORSTS_OFFSET);
 
 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
 	status32 = ioread32(mmio + ATOM_LTSSMERRSTS0_OFFSET);
-	dev_dbg(ndev_dev(ndev), "LTSSMERRSTS0 = %x\n", status32);
+	dev_dbg(dev, "LTSSMERRSTS0 = %x\n", status32);
 	status32 |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
 	iowrite32(status32, mmio + ATOM_LTSSMERRSTS0_OFFSET);
 
 	/* Clear DeSkew Buffer error, write to clear */
 	status32 = ioread32(mmio + ATOM_DESKEWSTS_OFFSET);
-	dev_dbg(ndev_dev(ndev), "DESKEWSTS = %x\n", status32);
+	dev_dbg(dev, "DESKEWSTS = %x\n", status32);
 	status32 |= ATOM_DESKEWSTS_DBERR;
 	iowrite32(status32, mmio + ATOM_DESKEWSTS_OFFSET);
 
 	status32 = ioread32(mmio + ATOM_IBSTERRRCRVSTS0_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IBSTERRRCRVSTS0 = %x\n", status32);
+	dev_dbg(dev, "IBSTERRRCRVSTS0 = %x\n", status32);
 	status32 &= ATOM_IBIST_ERR_OFLOW;
 	iowrite32(status32, mmio + ATOM_IBSTERRRCRVSTS0_OFFSET);
 
 	/* Releases the NTB state machine to allow the link to retrain */
 	status32 = ioread32(mmio + ATOM_LTSSMSTATEJMP_OFFSET);
-	dev_dbg(ndev_dev(ndev), "LTSSMSTATEJMP = %x\n", status32);
+	dev_dbg(dev, "LTSSMSTATEJMP = %x\n", status32);
 	status32 &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
 	iowrite32(status32, mmio + ATOM_LTSSMSTATEJMP_OFFSET);
 
@@ -1699,11 +1747,11 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	int b2b_bar;
 	u8 bar_sz;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	if (ndev->b2b_idx == UINT_MAX) {
-		dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
+		dev_dbg(&pdev->dev, "not using b2b mw\n");
 		b2b_bar = 0;
 		ndev->b2b_off = 0;
 	} else {
@@ -1711,24 +1759,21 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		if (b2b_bar < 0)
 			return -EIO;
 
-		dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar);
+		dev_dbg(&pdev->dev, "using b2b mw bar %d\n", b2b_bar);
 
 		bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
 
-		dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size);
+		dev_dbg(&pdev->dev, "b2b bar size %#llx\n", bar_size);
 
 		if (b2b_mw_share && ((bar_size >> 1) >= XEON_B2B_MIN_SIZE)) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using first half of bar\n");
+			dev_dbg(&pdev->dev, "b2b using first half of bar\n");
 			ndev->b2b_off = bar_size >> 1;
 		} else if (bar_size >= XEON_B2B_MIN_SIZE) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using whole bar\n");
+			dev_dbg(&pdev->dev, "b2b using whole bar\n");
 			ndev->b2b_off = 0;
 			--ndev->mw_count;
 		} else {
-			dev_dbg(ndev_dev(ndev),
-				"b2b bar size is too small\n");
+			dev_dbg(&pdev->dev, "b2b bar size is too small\n");
 			return -EIO;
 		}
 	}
@@ -1738,7 +1783,7 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	 * except disable or halve the size of the b2b secondary bar.
 	 */
 	pci_read_config_byte(pdev, SKX_IMBAR1SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "IMBAR1SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "IMBAR1SZ %#x\n", bar_sz);
 	if (b2b_bar == 1) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -1748,10 +1793,10 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 	pci_write_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "EMBAR1SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "EMBAR1SZ %#x\n", bar_sz);
 
 	pci_read_config_byte(pdev, SKX_IMBAR2SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "IMBAR2SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "IMBAR2SZ %#x\n", bar_sz);
 	if (b2b_bar == 2) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -1761,7 +1806,7 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 	pci_write_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "EMBAR2SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "EMBAR2SZ %#x\n", bar_sz);
 
 	/* SBAR01 hit by first part of the b2b bar */
 	if (b2b_bar == 0)
@@ -1777,12 +1822,12 @@ static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 1 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + SKX_IMBAR1XLMT_OFFSET);
 	bar_addr = ioread64(mmio + SKX_IMBAR1XLMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IMBAR1XLMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "IMBAR1XLMT %#018llx\n", bar_addr);
 
 	bar_addr = addr->bar4_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + SKX_IMBAR2XLMT_OFFSET);
 	bar_addr = ioread64(mmio + SKX_IMBAR2XLMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "IMBAR2XLMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "IMBAR2XLMT %#018llx\n", bar_addr);
 
 	/* zero incoming translation addrs */
 	iowrite64(0, mmio + SKX_IMBAR1XBASE_OFFSET);
@@ -1852,7 +1897,7 @@ static int skx_init_dev(struct intel_ntb_dev *ndev)
 	u8 ppd;
 	int rc;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	ndev->reg = &skx_reg;
 
@@ -1861,7 +1906,7 @@ static int skx_init_dev(struct intel_ntb_dev *ndev)
 		return -EIO;
 
 	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
-	dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd,
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
 		ntb_topo_string(ndev->ntb.topo));
 	if (ndev->ntb.topo == NTB_TOPO_NONE)
 		return -EINVAL;
@@ -1885,14 +1930,14 @@ static int intel_ntb3_link_enable(struct ntb_dev *ntb,
 
 	ndev = container_of(ntb, struct intel_ntb_dev, ntb);
 
-	dev_dbg(ndev_dev(ndev),
+	dev_dbg(&ntb->pdev->dev,
 		"Enabling link with max_speed %d max_width %d\n",
 		max_speed, max_width);
 
 	if (max_speed != NTB_SPEED_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_speed %d\n", max_speed);
 	if (max_width != NTB_WIDTH_AUTO)
-		dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width);
+		dev_dbg(&ntb->pdev->dev, "ignoring max_width %d\n", max_width);
 
 	ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
 	ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
@@ -1902,7 +1947,7 @@ static int intel_ntb3_link_enable(struct ntb_dev *ntb,
 
 	return 0;
 }
-static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
+static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 				   dma_addr_t addr, resource_size_t size)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -1912,6 +1957,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 	u64 base, limit, reg_val;
 	int bar;
 
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
 	if (idx >= ndev->b2b_idx && !ndev->b2b_off)
 		idx += 1;
 
@@ -1953,7 +2001,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
 
 	/* set and verify setting the limit */
 	iowrite64(limit, mmio + limit_reg);
@@ -1964,7 +2012,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
 
 	/* setup the EP */
 	limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10) + 0x4000;
@@ -1985,7 +2033,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
 		return -EIO;
 	}
 
-	dev_dbg(ndev_dev(ndev), "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
+	dev_dbg(&ntb->pdev->dev, "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
 
 	return 0;
 }
@@ -2092,7 +2140,7 @@ static inline enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd)
 static inline int xeon_ppd_bar4_split(struct intel_ntb_dev *ndev, u8 ppd)
 {
 	if (ppd & XEON_PPD_SPLIT_BAR_MASK) {
-		dev_dbg(ndev_dev(ndev), "PPD %d split bar\n", ppd);
+		dev_dbg(&ndev->ntb.pdev->dev, "PPD %d split bar\n", ppd);
 		return 1;
 	}
 	return 0;
@@ -2122,11 +2170,11 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	int b2b_bar;
 	u8 bar_sz;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 	mmio = ndev->self_mmio;
 
 	if (ndev->b2b_idx == UINT_MAX) {
-		dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
+		dev_dbg(&pdev->dev, "not using b2b mw\n");
 		b2b_bar = 0;
 		ndev->b2b_off = 0;
 	} else {
@@ -2134,24 +2182,21 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		if (b2b_bar < 0)
 			return -EIO;
 
-		dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar);
+		dev_dbg(&pdev->dev, "using b2b mw bar %d\n", b2b_bar);
 
 		bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
 
-		dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size);
+		dev_dbg(&pdev->dev, "b2b bar size %#llx\n", bar_size);
 
 		if (b2b_mw_share && XEON_B2B_MIN_SIZE <= bar_size >> 1) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using first half of bar\n");
+			dev_dbg(&pdev->dev, "b2b using first half of bar\n");
 			ndev->b2b_off = bar_size >> 1;
 		} else if (XEON_B2B_MIN_SIZE <= bar_size) {
-			dev_dbg(ndev_dev(ndev),
-				"b2b using whole bar\n");
+			dev_dbg(&pdev->dev, "b2b using whole bar\n");
 			ndev->b2b_off = 0;
 			--ndev->mw_count;
 		} else {
-			dev_dbg(ndev_dev(ndev),
-				"b2b bar size is too small\n");
+			dev_dbg(&pdev->dev, "b2b bar size is too small\n");
 			return -EIO;
 		}
 	}
@@ -2163,7 +2208,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	 * offsets are not in a consistent order (bar5sz comes after ppd, odd).
 	 */
 	pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "PBAR23SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "PBAR23SZ %#x\n", bar_sz);
 	if (b2b_bar == 2) {
 		if (ndev->b2b_off)
 			bar_sz -= 1;
@@ -2172,11 +2217,11 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	}
 	pci_write_config_byte(pdev, XEON_SBAR23SZ_OFFSET, bar_sz);
 	pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &bar_sz);
-	dev_dbg(ndev_dev(ndev), "SBAR23SZ %#x\n", bar_sz);
+	dev_dbg(&pdev->dev, "SBAR23SZ %#x\n", bar_sz);
 
 	if (!ndev->bar4_split) {
 		pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR45SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR45SZ %#x\n", bar_sz);
 		if (b2b_bar == 4) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2185,10 +2230,10 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR45SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR45SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR45SZ %#x\n", bar_sz);
 	} else {
 		pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR4SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR4SZ %#x\n", bar_sz);
 		if (b2b_bar == 4) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2197,10 +2242,10 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR4SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR4SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR4SZ %#x\n", bar_sz);
 
 		pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "PBAR5SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "PBAR5SZ %#x\n", bar_sz);
 		if (b2b_bar == 5) {
 			if (ndev->b2b_off)
 				bar_sz -= 1;
@@ -2209,7 +2254,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		}
 		pci_write_config_byte(pdev, XEON_SBAR5SZ_OFFSET, bar_sz);
 		pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &bar_sz);
-		dev_dbg(ndev_dev(ndev), "SBAR5SZ %#x\n", bar_sz);
+		dev_dbg(&pdev->dev, "SBAR5SZ %#x\n", bar_sz);
 	}
 
 	/* SBAR01 hit by first part of the b2b bar */
@@ -2226,7 +2271,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	else
 		return -EIO;
 
-	dev_dbg(ndev_dev(ndev), "SBAR01 %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR01 %#018llx\n", bar_addr);
 	iowrite64(bar_addr, mmio + XEON_SBAR0BASE_OFFSET);
 
 	/* Other SBAR are normally hit by the PBAR xlat, except for b2b bar.
@@ -2237,26 +2282,26 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + XEON_SBAR23BASE_OFFSET);
 	bar_addr = ioread64(mmio + XEON_SBAR23BASE_OFFSET);
-	dev_dbg(ndev_dev(ndev), "SBAR23 %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR23 %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = addr->bar4_addr64 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite64(bar_addr, mmio + XEON_SBAR45BASE_OFFSET);
 		bar_addr = ioread64(mmio + XEON_SBAR45BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR45 %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR45 %#018llx\n", bar_addr);
 	} else {
 		bar_addr = addr->bar4_addr32 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR4BASE_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR4BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR4 %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR4 %#010llx\n", bar_addr);
 
 		bar_addr = addr->bar5_addr32 +
 			(b2b_bar == 5 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR5BASE_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR5BASE_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR5 %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR5 %#010llx\n", bar_addr);
 	}
 
 	/* setup incoming bar limits == base addrs (zero length windows) */
@@ -2264,26 +2309,26 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = addr->bar2_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
 	iowrite64(bar_addr, mmio + XEON_SBAR23LMT_OFFSET);
 	bar_addr = ioread64(mmio + XEON_SBAR23LMT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "SBAR23LMT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "SBAR23LMT %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = addr->bar4_addr64 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite64(bar_addr, mmio + XEON_SBAR45LMT_OFFSET);
 		bar_addr = ioread64(mmio + XEON_SBAR45LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR45LMT %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR45LMT %#018llx\n", bar_addr);
 	} else {
 		bar_addr = addr->bar4_addr32 +
 			(b2b_bar == 4 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR4LMT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR4LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR4LMT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR4LMT %#010llx\n", bar_addr);
 
 		bar_addr = addr->bar5_addr32 +
 			(b2b_bar == 5 ? ndev->b2b_off : 0);
 		iowrite32(bar_addr, mmio + XEON_SBAR5LMT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_SBAR5LMT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "SBAR5LMT %#05llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "SBAR5LMT %#05llx\n", bar_addr);
 	}
 
 	/* zero incoming translation addrs */
@@ -2309,23 +2354,23 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 	bar_addr = peer_addr->bar2_addr64;
 	iowrite64(bar_addr, mmio + XEON_PBAR23XLAT_OFFSET);
 	bar_addr = ioread64(mmio + XEON_PBAR23XLAT_OFFSET);
-	dev_dbg(ndev_dev(ndev), "PBAR23XLAT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "PBAR23XLAT %#018llx\n", bar_addr);
 
 	if (!ndev->bar4_split) {
 		bar_addr = peer_addr->bar4_addr64;
 		iowrite64(bar_addr, mmio + XEON_PBAR45XLAT_OFFSET);
 		bar_addr = ioread64(mmio + XEON_PBAR45XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR45XLAT %#018llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR45XLAT %#018llx\n", bar_addr);
 	} else {
 		bar_addr = peer_addr->bar4_addr32;
 		iowrite32(bar_addr, mmio + XEON_PBAR4XLAT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_PBAR4XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR4XLAT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR4XLAT %#010llx\n", bar_addr);
 
 		bar_addr = peer_addr->bar5_addr32;
 		iowrite32(bar_addr, mmio + XEON_PBAR5XLAT_OFFSET);
 		bar_addr = ioread32(mmio + XEON_PBAR5XLAT_OFFSET);
-		dev_dbg(ndev_dev(ndev), "PBAR5XLAT %#010llx\n", bar_addr);
+		dev_dbg(&pdev->dev, "PBAR5XLAT %#010llx\n", bar_addr);
 	}
 
 	/* set the translation offset for b2b registers */
@@ -2343,7 +2388,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 		return -EIO;
 
 	/* B2B_XLAT_OFFSET is 64bit, but can only take 32bit writes */
-	dev_dbg(ndev_dev(ndev), "B2BXLAT %#018llx\n", bar_addr);
+	dev_dbg(&pdev->dev, "B2BXLAT %#018llx\n", bar_addr);
 	iowrite32(bar_addr, mmio + XEON_B2B_XLAT_OFFSETL);
 	iowrite32(bar_addr >> 32, mmio + XEON_B2B_XLAT_OFFSETU);
 
@@ -2362,6 +2407,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
 
 static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 {
+	struct device *dev = &ndev->ntb.pdev->dev;
 	int rc;
 	u32 ntb_ctl;
 
@@ -2377,7 +2423,7 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 	switch (ndev->ntb.topo) {
 	case NTB_TOPO_PRI:
 		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
-			dev_err(ndev_dev(ndev), "NTB Primary config disabled\n");
+			dev_err(dev, "NTB Primary config disabled\n");
 			return -EINVAL;
 		}
 
@@ -2395,7 +2441,7 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 
 	case NTB_TOPO_SEC:
 		if (ndev->hwerr_flags & NTB_HWERR_SDOORBELL_LOCKUP) {
-			dev_err(ndev_dev(ndev), "NTB Secondary config disabled\n");
+			dev_err(dev, "NTB Secondary config disabled\n");
 			return -EINVAL;
 		}
 		/* use half the spads for the peer */
@@ -2420,18 +2466,17 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
 				ndev->b2b_idx = b2b_mw_idx;
 
 			if (ndev->b2b_idx >= ndev->mw_count) {
-				dev_dbg(ndev_dev(ndev),
+				dev_dbg(dev,
 					"b2b_mw_idx %d invalid for mw_count %u\n",
 					b2b_mw_idx, ndev->mw_count);
 				return -EINVAL;
 			}
 
-			dev_dbg(ndev_dev(ndev),
-				"setting up b2b mw idx %d means %d\n",
+			dev_dbg(dev, "setting up b2b mw idx %d means %d\n",
 				b2b_mw_idx, ndev->b2b_idx);
 
 		} else if (ndev->hwerr_flags & NTB_HWERR_B2BDOORBELL_BIT14) {
-			dev_warn(ndev_dev(ndev), "Reduce doorbell count by 1\n");
+			dev_warn(dev, "Reduce doorbell count by 1\n");
 			ndev->db_count -= 1;
 		}
 
@@ -2472,7 +2517,7 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 	u8 ppd;
 	int rc, mem;
 
-	pdev = ndev_pdev(ndev);
+	pdev = ndev->ntb.pdev;
 
 	switch (pdev->device) {
 	/* There is a Xeon hardware errata related to writes to SDOORBELL or
@@ -2548,14 +2593,14 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 		return -EIO;
 
 	ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
-	dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd,
+	dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd,
 		ntb_topo_string(ndev->ntb.topo));
 	if (ndev->ntb.topo == NTB_TOPO_NONE)
 		return -EINVAL;
 
 	if (ndev->ntb.topo != NTB_TOPO_SEC) {
 		ndev->bar4_split = xeon_ppd_bar4_split(ndev, ppd);
-		dev_dbg(ndev_dev(ndev), "ppd %#x bar4_split %d\n",
+		dev_dbg(&pdev->dev, "ppd %#x bar4_split %d\n",
 			ppd, ndev->bar4_split);
 	} else {
 		/* This is a way for transparent BAR to figure out if we are
@@ -2565,7 +2610,7 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
 		mem = pci_select_bars(pdev, IORESOURCE_MEM);
 		ndev->bar4_split = hweight32(mem) ==
 			HSX_SPLIT_BAR_MW_COUNT + 1;
-		dev_dbg(ndev_dev(ndev), "mem %#x bar4_split %d\n",
+		dev_dbg(&pdev->dev, "mem %#x bar4_split %d\n",
 			mem, ndev->bar4_split);
 	}
 
@@ -2602,7 +2647,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
 
 	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -2610,7 +2655,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
 			goto err_dma_mask;
-		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
 	}
 
 	ndev->self_mmio = pci_iomap(pdev, 0, 0);
@@ -2636,7 +2681,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
 
 static void intel_ntb_deinit_pci(struct intel_ntb_dev *ndev)
 {
-	struct pci_dev *pdev = ndev_pdev(ndev);
+	struct pci_dev *pdev = ndev->ntb.pdev;
 
 	if (ndev->peer_mmio && ndev->peer_mmio != ndev->self_mmio)
 		pci_iounmap(pdev, ndev->peer_mmio);
@@ -2906,8 +2951,10 @@ static const struct intel_ntb_xlat_reg skx_sec_xlat = {
 /* operations for primary side of local ntb */
 static const struct ntb_dev_ops intel_ntb_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb_link_enable,
 	.link_disable		= intel_ntb_link_disable,
@@ -2932,8 +2979,10 @@ static const struct ntb_dev_ops intel_ntb_ops = {
 
 static const struct ntb_dev_ops intel_ntb3_ops = {
 	.mw_count		= intel_ntb_mw_count,
-	.mw_get_range		= intel_ntb_mw_get_range,
+	.mw_get_align		= intel_ntb_mw_get_align,
 	.mw_set_trans		= intel_ntb3_mw_set_trans,
+	.peer_mw_count		= intel_ntb_peer_mw_count,
+	.peer_mw_get_addr	= intel_ntb_peer_mw_get_addr,
 	.link_is_up		= intel_ntb_link_is_up,
 	.link_enable		= intel_ntb3_link_enable,
 	.link_disable		= intel_ntb_link_disable,
@@ -3008,4 +3057,3 @@ static void __exit intel_ntb_pci_driver_exit(void)
 	debugfs_remove_recursive(debugfs_dir);
 }
 module_exit(intel_ntb_pci_driver_exit);
-

diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index f2cf8a7..2d6c38a 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h

@@ -382,9 +382,6 @@ struct intel_ntb_dev {
 	struct dentry			*debugfs_info;
 };
 
-#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
-#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
-#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
 #define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
 #define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
 				     hb_timer.work)

diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c
index 2e25307..03b80d8 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/ntb.c

@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -191,6 +193,73 @@ void ntb_db_event(struct ntb_dev *ntb, int vector)
 }
 EXPORT_SYMBOL(ntb_db_event);
 
+void ntb_msg_event(struct ntb_dev *ntb)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ntb->ctx_lock, irqflags);
+	{
+		if (ntb->ctx_ops && ntb->ctx_ops->msg_event)
+			ntb->ctx_ops->msg_event(ntb->ctx);
+	}
+	spin_unlock_irqrestore(&ntb->ctx_lock, irqflags);
+}
+EXPORT_SYMBOL(ntb_msg_event);
+
+int ntb_default_port_number(struct ntb_dev *ntb)
+{
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_PRI_USD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_SEC_DSD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_port_number);
+
+int ntb_default_peer_port_count(struct ntb_dev *ntb)
+{
+	return NTB_DEF_PEER_CNT;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_count);
+
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (pidx != NTB_DEF_PEER_IDX)
+		return -EINVAL;
+
+	switch (ntb->topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_B2B_USD:
+		return NTB_PORT_SEC_DSD;
+	case NTB_TOPO_SEC:
+	case NTB_TOPO_B2B_DSD:
+		return NTB_PORT_PRI_USD;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_number);
+
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port)
+{
+	int peer_port = ntb_default_peer_port_number(ntb, NTB_DEF_PEER_IDX);
+
+	if (peer_port == -EINVAL || port != peer_port)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_default_peer_port_idx);
+
 static int ntb_probe(struct device *dev)
 {
 	struct ntb_dev *ntb;

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 10e5bf4..9a03c58 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c

@@ -95,6 +95,9 @@ MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
 
 static struct dentry *nt_debugfs_dir;
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct ntb_queue_entry {
 	/* ntb_queue list reference */
 	struct list_head entry;
@@ -670,7 +673,7 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(nt->ndev, num_mw);
+	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
 	dma_free_coherent(&pdev->dev, mw->buff_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -727,7 +730,8 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	}
 
 	/* Notify HW the memory location of the receive buffer */
-	rc = ntb_mw_set_trans(nt->ndev, num_mw, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
+			      mw->xlat_size);
 	if (rc) {
 		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
 		ntb_free_mw(nt, num_mw);
@@ -858,17 +862,17 @@ static void ntb_transport_link_work(struct work_struct *work)
 			size = max_mw_size;
 
 		spad = MW0_SZ_HIGH + (i * 2);
-		ntb_peer_spad_write(ndev, spad, upper_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, upper_32_bits(size));
 
 		spad = MW0_SZ_LOW + (i * 2);
-		ntb_peer_spad_write(ndev, spad, lower_32_bits(size));
+		ntb_peer_spad_write(ndev, PIDX, spad, lower_32_bits(size));
 	}
 
-	ntb_peer_spad_write(ndev, NUM_MWS, nt->mw_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_MWS, nt->mw_count);
 
-	ntb_peer_spad_write(ndev, NUM_QPS, nt->qp_count);
+	ntb_peer_spad_write(ndev, PIDX, NUM_QPS, nt->qp_count);
 
-	ntb_peer_spad_write(ndev, VERSION, NTB_TRANSPORT_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, VERSION, NTB_TRANSPORT_VERSION);
 
 	/* Query the remote side for its info */
 	val = ntb_spad_read(ndev, VERSION);
@@ -944,7 +948,7 @@ static void ntb_qp_link_work(struct work_struct *work)
 
 	val = ntb_spad_read(nt->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(nt->ndev, QP_LINKS, val | BIT(qp->qp_num));
+	ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num));
 
 	/* query remote spad for qp ready bits */
 	dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val);
@@ -1055,7 +1059,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	int node;
 	int rc, i;
 
-	mw_count = ntb_mw_count(ndev);
+	mw_count = ntb_mw_count(ndev, PIDX);
+
+	if (!ndev->ops->mw_set_trans) {
+		dev_err(&ndev->dev, "Inbound MW based NTB API is required\n");
+		return -EINVAL;
+	}
 
 	if (ntb_db_is_unsafe(ndev))
 		dev_dbg(&ndev->dev,
@@ -1064,6 +1073,9 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		dev_dbg(&ndev->dev,
 			"scratchpad is unsafe, proceed anyway...\n");
 
+	if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT)
+		dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n");
+
 	node = dev_to_node(&ndev->dev);
 
 	nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node);
@@ -1094,8 +1106,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	for (i = 0; i < mw_count; i++) {
 		mw = &nt->mw_vec[i];
 
-		rc = ntb_mw_get_range(ndev, i, &mw->phys_addr, &mw->phys_size,
-				      &mw->xlat_align, &mw->xlat_align_size);
+		rc = ntb_mw_get_align(ndev, PIDX, i, &mw->xlat_align,
+				      &mw->xlat_align_size, NULL);
+		if (rc)
+			goto err1;
+
+		rc = ntb_peer_mw_get_addr(ndev, i, &mw->phys_addr,
+					  &mw->phys_size);
 		if (rc)
 			goto err1;
 
@@ -2091,8 +2108,7 @@ void ntb_transport_link_down(struct ntb_transport_qp *qp)
 
 	val = ntb_spad_read(qp->ndev, QP_LINKS);
 
-	ntb_peer_spad_write(qp->ndev, QP_LINKS,
-			    val & ~BIT(qp->qp_num));
+	ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num));
 
 	if (qp->link_is_up)
 		ntb_send_link_down(qp);

diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 5cab283..759f772 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c

@@ -76,6 +76,7 @@
 #define DMA_RETRIES		20
 #define SZ_4G			(1ULL << 32)
 #define MAX_SEG_ORDER		20 /* no larger than 1M for kmalloc buffer */
+#define PIDX			NTB_DEF_PEER_IDX
 
 MODULE_LICENSE(DRIVER_LICENSE);
 MODULE_VERSION(DRIVER_VERSION);
@@ -100,6 +101,10 @@ static bool use_dma; /* default to 0 */
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
 
+static bool on_node = true; /* default to 1 */
+module_param(on_node, bool, 0644);
+MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");
+
 struct perf_mw {
 	phys_addr_t	phys_addr;
 	resource_size_t	phys_size;
@@ -135,9 +140,6 @@ struct perf_ctx {
 	bool			link_is_up;
 	struct delayed_work	link_work;
 	wait_queue_head_t	link_wq;
-	struct dentry		*debugfs_node_dir;
-	struct dentry		*debugfs_run;
-	struct dentry		*debugfs_threads;
 	u8			perf_threads;
 	/* mutex ensures only one set of threads run at once */
 	struct mutex		run_mutex;
@@ -344,6 +346,10 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
 
 static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
 {
+	/* Is the channel required to be on the same node as the device? */
+	if (!on_node)
+		return true;
+
 	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
 }
 
@@ -361,7 +367,7 @@ static int ntb_perf_thread(void *data)
 
 	pr_debug("kthread %s starting...\n", current->comm);
 
-	node = dev_to_node(&pdev->dev);
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
 
 	if (use_dma && !pctx->dma_chan) {
 		dma_cap_mask_t dma_mask;
@@ -454,7 +460,7 @@ static void perf_free_mw(struct perf_ctx *perf)
 	if (!mw->virt_addr)
 		return;
 
-	ntb_mw_clear_trans(perf->ntb, 0);
+	ntb_mw_clear_trans(perf->ntb, PIDX, 0);
 	dma_free_coherent(&pdev->dev, mw->buf_size,
 			  mw->virt_addr, mw->dma_addr);
 	mw->xlat_size = 0;
@@ -490,7 +496,7 @@ static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
 		mw->buf_size = 0;
 	}
 
-	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
+	rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
 	if (rc) {
 		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
 		perf_free_mw(perf);
@@ -517,9 +523,9 @@ static void perf_link_work(struct work_struct *work)
 	if (max_mw_size && size > max_mw_size)
 		size = max_mw_size;
 
-	ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
-	ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
-	ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
 
 	/* now read what peer wrote */
 	val = ntb_spad_read(ndev, VERSION);
@@ -561,8 +567,12 @@ static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
 
 	mw = &perf->mw;
 
-	rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
-			      &mw->xlat_align, &mw->xlat_align_size);
+	rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align,
+			      &mw->xlat_align_size, NULL);
+	if (rc)
+		return rc;
+
+	rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
 	if (rc)
 		return rc;
 
@@ -677,7 +687,8 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 		pr_info("Fix run_order to %u\n", run_order);
 	}
 
-	node = dev_to_node(&perf->ntb->pdev->dev);
+	node = on_node ? dev_to_node(&perf->ntb->pdev->dev)
+		       : NUMA_NO_NODE;
 	atomic_set(&perf->tdone, 0);
 
 	/* launch kernel thread */
@@ -723,34 +734,71 @@ static const struct file_operations ntb_perf_debugfs_run = {
 static int perf_debugfs_setup(struct perf_ctx *perf)
 {
 	struct pci_dev *pdev = perf->ntb->pdev;
+	struct dentry *debugfs_node_dir;
+	struct dentry *debugfs_run;
+	struct dentry *debugfs_threads;
+	struct dentry *debugfs_seg_order;
+	struct dentry *debugfs_run_order;
+	struct dentry *debugfs_use_dma;
+	struct dentry *debugfs_on_node;
 
 	if (!debugfs_initialized())
 		return -ENODEV;
 
+	/* Assumpion: only one NTB device in the system */
 	if (!perf_debugfs_dir) {
 		perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
 		if (!perf_debugfs_dir)
 			return -ENODEV;
 	}
 
-	perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
-						    perf_debugfs_dir);
-	if (!perf->debugfs_node_dir)
-		return -ENODEV;
+	debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+					      perf_debugfs_dir);
+	if (!debugfs_node_dir)
+		goto err;
 
-	perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
-						perf->debugfs_node_dir, perf,
-						&ntb_perf_debugfs_run);
-	if (!perf->debugfs_run)
-		return -ENODEV;
+	debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+					  debugfs_node_dir, perf,
+					  &ntb_perf_debugfs_run);
+	if (!debugfs_run)
+		goto err;
 
-	perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
-						  perf->debugfs_node_dir,
-						  &perf->perf_threads);
-	if (!perf->debugfs_threads)
-		return -ENODEV;
+	debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+					    debugfs_node_dir,
+					    &perf->perf_threads);
+	if (!debugfs_threads)
+		goto err;
+
+	debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
+					       debugfs_node_dir,
+					       &seg_order);
+	if (!debugfs_seg_order)
+		goto err;
+
+	debugfs_run_order = debugfs_create_u32("run_order", 0600,
+					       debugfs_node_dir,
+					       &run_order);
+	if (!debugfs_run_order)
+		goto err;
+
+	debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
+					       debugfs_node_dir,
+					       &use_dma);
+	if (!debugfs_use_dma)
+		goto err;
+
+	debugfs_on_node = debugfs_create_bool("on_node", 0600,
+					      debugfs_node_dir,
+					      &on_node);
+	if (!debugfs_on_node)
+		goto err;
 
 	return 0;
+
+err:
+	debugfs_remove_recursive(perf_debugfs_dir);
+	perf_debugfs_dir = NULL;
+	return -ENODEV;
 }
 
 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
@@ -766,8 +814,15 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
 		return -EIO;
 	}
 
-	node = dev_to_node(&pdev->dev);
+	if (!ntb->ops->mw_set_trans) {
+		dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
+		return -EINVAL;
+	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
+
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
 	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
 	if (!perf) {
 		rc = -ENOMEM;

diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c
index 4358611..938a18bc 100644
--- a/drivers/ntb/test/ntb_pingpong.c
+++ b/drivers/ntb/test/ntb_pingpong.c

@@ -90,6 +90,9 @@ static unsigned long db_init = 0x7;
 module_param(db_init, ulong, 0644);
 MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer");
 
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
 struct pp_ctx {
 	struct ntb_dev			*ntb;
 	u64				db_bits;
@@ -135,7 +138,7 @@ static void pp_ping(unsigned long ctx)
 			"Ping bits %#llx read %#x write %#x\n",
 			db_bits, spad_rd, spad_wr);
 
-		ntb_peer_spad_write(pp->ntb, 0, spad_wr);
+		ntb_peer_spad_write(pp->ntb, PIDX, 0, spad_wr);
 		ntb_peer_db_set(pp->ntb, db_bits);
 		ntb_db_clear_mask(pp->ntb, db_mask);
 
@@ -222,6 +225,12 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_pp;
+	}
+
 	if (ntb_spad_is_unsafe(ntb)) {
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 		if (!unsafe) {
@@ -230,6 +239,9 @@ static int pp_probe(struct ntb_client *client,
 		}
 	}
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
 	if (!pp) {
 		rc = -ENOMEM;

diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index 61bf2ef..f002bf4 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c

@@ -119,7 +119,10 @@ MODULE_VERSION(DRIVER_VERSION);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
 
-#define MAX_MWS 16
+/* It is rare to have hadrware with greater than six MWs */
+#define MAX_MWS	6
+/* Only two-ports devices are supported */
+#define PIDX	NTB_DEF_PEER_IDX
 
 static struct dentry *tool_dbgfs;
 
@@ -459,13 +462,22 @@ static TOOL_FOPS_RDWR(tool_spad_fops,
 		      tool_spad_read,
 		      tool_spad_write);
 
+static u32 ntb_tool_peer_spad_read(struct ntb_dev *ntb, int sidx)
+{
+	return ntb_peer_spad_read(ntb, PIDX, sidx);
+}
+
 static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf,
 				   size_t size, loff_t *offp)
 {
 	struct tool_ctx *tc = filep->private_data;
 
-	return tool_spadfn_read(tc, ubuf, size, offp,
-				tc->ntb->ops->peer_spad_read);
+	return tool_spadfn_read(tc, ubuf, size, offp, ntb_tool_peer_spad_read);
+}
+
+static int ntb_tool_peer_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
+{
+	return ntb_peer_spad_write(ntb, PIDX, sidx, val);
 }
 
 static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
@@ -474,7 +486,7 @@ static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
 	struct tool_ctx *tc = filep->private_data;
 
 	return tool_spadfn_write(tc, ubuf, size, offp,
-				 tc->ntb->ops->peer_spad_write);
+				 ntb_tool_peer_spad_write);
 }
 
 static TOOL_FOPS_RDWR(tool_peer_spad_fops,
@@ -668,28 +680,27 @@ static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
 {
 	int rc;
 	struct tool_mw *mw = &tc->mws[idx];
-	phys_addr_t base;
-	resource_size_t size, align, align_size;
+	resource_size_t size, align_addr, align_size;
 	char buf[16];
 
 	if (mw->peer)
 		return 0;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align,
-			      &align_size);
+	rc = ntb_mw_get_align(tc->ntb, PIDX, idx, &align_addr,
+				&align_size, &size);
 	if (rc)
 		return rc;
 
 	mw->size = min_t(resource_size_t, req_size, size);
-	mw->size = round_up(mw->size, align);
+	mw->size = round_up(mw->size, align_addr);
 	mw->size = round_up(mw->size, align_size);
 	mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
 				      &mw->peer_dma, GFP_KERNEL);
 
-	if (!mw->peer)
+	if (!mw->peer || !IS_ALIGNED(mw->peer_dma, align_addr))
 		return -ENOMEM;
 
-	rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size);
+	rc = ntb_mw_set_trans(tc->ntb, PIDX, idx, mw->peer_dma, mw->size);
 	if (rc)
 		goto err_free_dma;
 
@@ -716,7 +727,7 @@ static void tool_free_mw(struct tool_ctx *tc, int idx)
 	struct tool_mw *mw = &tc->mws[idx];
 
 	if (mw->peer) {
-		ntb_mw_clear_trans(tc->ntb, idx);
+		ntb_mw_clear_trans(tc->ntb, PIDX, idx);
 		dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
 				  mw->peer,
 				  mw->peer_dma);
@@ -742,8 +753,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	phys_addr_t base;
 	resource_size_t mw_size;
-	resource_size_t align;
+	resource_size_t align_addr;
 	resource_size_t align_size;
+	resource_size_t max_size;
 
 	buf_size = min_t(size_t, size, 512);
 
@@ -751,8 +763,9 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 	if (!buf)
 		return -ENOMEM;
 
-	ntb_mw_get_range(mw->tc->ntb, mw->idx,
-			 &base, &mw_size, &align, &align_size);
+	ntb_mw_get_align(mw->tc->ntb, PIDX, mw->idx,
+			 &align_addr, &align_size, &max_size);
+	ntb_peer_mw_get_addr(mw->tc->ntb, mw->idx, &base, &mw_size);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Peer MW %d Information:\n", mw->idx);
@@ -767,13 +780,17 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep,
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Alignment             \t%lld\n",
-			 (unsigned long long)align);
+			 (unsigned long long)align_addr);
 
 	off += scnprintf(buf + off, buf_size - off,
 			 "Size Alignment        \t%lld\n",
 			 (unsigned long long)align_size);
 
 	off += scnprintf(buf + off, buf_size - off,
+			 "Size Max              \t%lld\n",
+			 (unsigned long long)max_size);
+
+	off += scnprintf(buf + off, buf_size - off,
 			 "Ready                 \t%c\n",
 			 (mw->peer) ? 'Y' : 'N');
 
@@ -827,8 +844,7 @@ static int tool_init_mw(struct tool_ctx *tc, int idx)
 	phys_addr_t base;
 	int rc;
 
-	rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size,
-			      NULL, NULL);
+	rc = ntb_peer_mw_get_addr(tc->ntb, idx, &base, &mw->win_size);
 	if (rc)
 		return rc;
 
@@ -913,12 +929,27 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	int rc;
 	int i;
 
+	if (!ntb->ops->mw_set_trans) {
+		dev_dbg(&ntb->dev, "need inbound MW based NTB API\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
 	if (ntb_db_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
 
 	if (ntb_spad_is_unsafe(ntb))
 		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
 
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
 	tc = kzalloc(sizeof(*tc), GFP_KERNEL);
 	if (!tc) {
 		rc = -ENOMEM;
@@ -928,7 +959,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	tc->ntb = ntb;
 	init_waitqueue_head(&tc->link_wq);
 
-	tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS);
+	tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS);
 	for (i = 0; i < tc->mw_count; i++) {
 		rc = tool_init_mw(tc, i);
 		if (rc)

diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index f12d23c..345acca 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c

@@ -106,7 +106,8 @@ static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
 
 		len -= cur_len;
 		dev_offset += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return err;
@@ -179,16 +180,8 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0, rw;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	bip = bio_integrity(bio);
 	nsblk = q->queuedata;
@@ -212,7 +205,6 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
- out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 64216de..14323fa 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c

@@ -985,7 +985,8 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
 
 		len -= cur_len;
 		meta_nsoff += cur_len;
-		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+		if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
+			return -EIO;
 	}
 
 	return ret;
@@ -1203,16 +1204,8 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	int err = 0;
 	bool do_acct;
 
-	/*
-	 * bio_integrity_enabled also checks if the bio already has an
-	 * integrity payload attached. If it does, we *don't* do a
-	 * bio_integrity_prep here - the payload has been generated by
-	 * another kernel subsystem, and we just pass it through.
-	 */
-	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
-		bio->bi_status = BLK_STS_IOERR;
-		goto out;
-	}
+	if (!bio_integrity_prep(bio))
+		return BLK_QC_T_NONE;
 
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
@@ -1239,7 +1232,6 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
-out:
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
 }

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d70df1d..cb96f4a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c

@@ -131,7 +131,7 @@ void nvme_complete_rq(struct request *req)
 {
 	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
 		nvme_req(req)->retries++;
-		blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+		blk_mq_requeue_request(req, true);
 		return;
 	}
 
@@ -2591,12 +2591,29 @@ static void nvme_release_instance(struct nvme_ctrl *ctrl)
 	spin_unlock(&dev_list_lock);
 }
 
-void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
+	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	flush_work(&ctrl->scan_work);
-	nvme_remove_namespaces(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
 
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+	if (ctrl->kato)
+		nvme_start_keep_alive(ctrl);
+
+	if (ctrl->queue_count > 1) {
+		nvme_queue_scan(ctrl);
+		nvme_queue_async_events(ctrl);
+		nvme_start_queues(ctrl);
+	}
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+{
 	device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
 
 	spin_lock(&dev_list_lock);
@@ -2694,9 +2711,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
 	blk_mq_unquiesce_queue(ctrl->admin_q);
 
-	/* Forcibly start all queues to avoid having stuck requests */
-	blk_mq_start_hw_queues(ctrl->admin_q);
-
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		/*
 		 * Revalidating a dead namespace sets capacity to 0. This will
@@ -2709,16 +2723,6 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 
 		/* Forcibly unquiesce queues to avoid blocking dispatch */
 		blk_mq_unquiesce_queue(ns->queue);
-
-		/*
-		 * Forcibly start all queues to avoid having stuck requests.
-		 * Note that we must ensure the queues are not stopped
-		 * when the final removal happens.
-		 */
-		blk_mq_start_hw_queues(ns->queue);
-
-		/* draining requests in requeue list */
-		blk_mq_kick_requeue_list(ns->queue);
 	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
@@ -2787,10 +2791,8 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns;
 
 	mutex_lock(&ctrl->namespaces_mutex);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	list_for_each_entry(ns, &ctrl->namespaces, list)
 		blk_mq_unquiesce_queue(ns->queue);
-		blk_mq_kick_requeue_list(ns->queue);
-	}
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ed87214..d666ada 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c

@@ -148,13 +148,10 @@ struct nvme_fc_ctrl {
 	struct device		*dev;
 	struct nvme_fc_lport	*lport;
 	struct nvme_fc_rport	*rport;
-	u32			queue_count;
 	u32			cnum;
 
 	u64			association_id;
 
-	u64			cap;
-
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
 
 	struct blk_mq_tag_set	admin_tag_set;
@@ -1614,7 +1611,7 @@ nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_free_queue(&ctrl->queues[i]);
 }
 
@@ -1635,10 +1632,10 @@ __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl,
 static void
 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1];
+	struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1];
 	int i;
 
-	for (i = ctrl->queue_count - 1; i >= 1; i--, queue--)
+	for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--)
 		__nvme_fc_delete_hw_queue(ctrl, queue, i);
 }
 
@@ -1648,7 +1645,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 	struct nvme_fc_queue *queue = &ctrl->queues[1];
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++, queue++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) {
 		ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize);
 		if (ret)
 			goto delete_queues;
@@ -1667,7 +1664,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize,
 					(qsize / 5));
 		if (ret)
@@ -1685,7 +1682,7 @@ nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize);
 }
 
@@ -1706,6 +1703,7 @@ nvme_fc_ctrl_free(struct kref *ref)
 	list_del(&ctrl->ctrl_list);
 	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
 
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
 
@@ -1969,10 +1967,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		if (ret != -EBUSY)
 			return BLK_STS_IOERR;
 
-		if (op->rq) {
-			blk_mq_stop_hw_queues(op->rq->q);
-			blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
-		}
+		if (op->rq)
+			blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
+
 		return BLK_STS_RESOURCE;
 	}
 
@@ -2178,17 +2175,20 @@ static int
 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
-	ctrl->queue_count = opts->nr_io_queues + 1;
-	if (!opts->nr_io_queues)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (!nr_io_queues)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2204,7 +2204,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 						sizeof(struct scatterlist)) +
 					ctrl->lport->ops->fcprqst_priv_sz;
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -2232,7 +2232,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 out_delete_hw_queues:
 	nvme_fc_delete_hw_io_queues(ctrl);
 out_cleanup_blk_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	blk_cleanup_queue(ctrl->ctrl.connect_q);
 out_free_tag_set:
 	blk_mq_free_tag_set(&ctrl->tag_set);
@@ -2248,17 +2247,21 @@ static int
 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
+	nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()),
+				ctrl->lport->ops->max_hw_queues);
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret) {
 		dev_info(ctrl->ctrl.device,
 			"set_queue_count failed: %d\n", ret);
 		return ret;
 	}
 
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
 	/* check for io queues existing */
-	if (ctrl->queue_count == 1)
+	if (ctrl->ctrl.queue_count == 1)
 		return 0;
 
 	nvme_fc_init_io_queues(ctrl);
@@ -2275,6 +2278,8 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
 	if (ret)
 		goto out_delete_hw_queues;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
+
 	return 0;
 
 out_delete_hw_queues:
@@ -2316,7 +2321,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_delete_hw_queue;
 
 	if (ctrl->ctrl.state != NVME_CTRL_NEW)
-		blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
 	ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 	if (ret)
@@ -2329,7 +2334,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * prior connection values
 	 */
 
-	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (ret) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -2337,9 +2342,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto out_disconnect_admin_queue;
 
@@ -2360,8 +2365,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 		goto out_disconnect_admin_queue;
 	}
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	/* FC-NVME supports normal SGL Data Block Descriptors */
 
 	if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -2381,7 +2384,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	 * Create the io queues
 	 */
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		if (ctrl->ctrl.state == NVME_CTRL_NEW)
 			ret = nvme_fc_create_io_queues(ctrl);
 		else
@@ -2395,17 +2398,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return 0;	/* Success */
 
 out_term_aen_ops:
 	nvme_fc_term_aen_ops(ctrl);
-	nvme_stop_keep_alive(&ctrl->ctrl);
 out_disconnect_admin_queue:
 	/* send a Disconnect(association) LS to fc-nvme target */
 	nvme_fc_xmt_disconnect_assoc(ctrl);
@@ -2428,8 +2426,6 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 {
 	unsigned long flags;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
 	spin_lock_irqsave(&ctrl->lock, flags);
 	ctrl->flags |= FCCTRL_TERMIO;
 	ctrl->iocnt = 0;
@@ -2447,7 +2443,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * io requests back to the block layer as part of normal completions
 	 * (but with error status).
 	 */
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -2470,7 +2466,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	 * use blk_mq_tagset_busy_itr() and the transport routine to
 	 * terminate the exchanges.
 	 */
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_fc_terminate_exchange, &ctrl->ctrl);
 
@@ -2511,7 +2507,8 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
 
 	cancel_work_sync(&ctrl->ctrl.reset_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
-
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	/*
 	 * kill the association on the link side.  this will block
 	 * waiting for io to terminate
@@ -2606,6 +2603,7 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
@@ -2702,18 +2700,17 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
-	ctrl->queue_count = min_t(unsigned int,
+	ctrl->ctrl.queue_count = min_t(unsigned int,
 				opts->nr_io_queues,
 				lport->ops->max_hw_queues);
-	opts->nr_io_queues = ctrl->queue_count;	/* so opts has valid value */
-	ctrl->queue_count++;	/* +1 for admin queue */
+	ctrl->ctrl.queue_count++;	/* +1 for admin queue */
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue),
-				GFP_KERNEL);
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
+				sizeof(struct nvme_fc_queue), GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_free_ida;
 

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index d70ff0f..8f2a168 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h

@@ -142,7 +142,9 @@ struct nvme_ctrl {
 	u16 cntlid;
 
 	u32 ctrl_config;
+	u32 queue_count;
 
+	u64 cap;
 	u32 page_size;
 	u32 max_hw_sectors;
 	u16 oncs;
@@ -278,6 +280,8 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		const struct nvme_ctrl_ops *ops, unsigned long quirks);
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
 void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b7a84c5..d10d2f2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c

@@ -35,7 +35,6 @@
 
 #include "nvme.h"
 
-#define NVME_Q_DEPTH		1024
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
 	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+	.set = io_queue_depth_set,
+	.get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
 struct nvme_dev;
 struct nvme_queue;
 
@@ -74,7 +83,6 @@ struct nvme_dev {
 	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
-	unsigned queue_count;
 	unsigned online_queues;
 	unsigned max_qid;
 	int q_depth;
@@ -105,6 +113,17 @@ struct nvme_dev {
 	void **host_mem_desc_bufs;
 };
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+	int n = 0, ret;
+
+	ret = kstrtoint(val, 10, &n);
+	if (ret != 0 || n < 2)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
 {
 	return qid * 2 * stride;
@@ -1099,9 +1118,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
 	int i;
 
-	for (i = dev->queue_count - 1; i >= lowest; i--) {
+	for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
 		struct nvme_queue *nvmeq = dev->queues[i];
-		dev->queue_count--;
+		dev->ctrl.queue_count--;
 		dev->queues[i] = NULL;
 		nvme_free_queue(nvmeq);
 	}
@@ -1126,7 +1145,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
-		blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
 
 	pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
 
@@ -1145,8 +1164,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 	if (shutdown)
 		nvme_shutdown_ctrl(&dev->ctrl);
 	else
-		nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
-						dev->bar + NVME_REG_CAP));
+		nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
@@ -1221,7 +1239,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->qid = qid;
 	nvmeq->cq_vector = -1;
 	dev->queues[qid] = nvmeq;
-	dev->queue_count++;
+	dev->ctrl.queue_count++;
 
 	return nvmeq;
 
@@ -1317,7 +1335,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
 		 * user requests may be waiting on a stopped queue. Start the
 		 * queue to flush these to completion.
 		 */
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -1354,7 +1372,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 			return -ENODEV;
 		}
 	} else
-		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 
 	return 0;
 }
@@ -1385,11 +1403,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
 	return 0;
 }
 
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
-	u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 	struct nvme_queue *nvmeq;
 
 	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1397,13 +1414,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		return result;
 
 	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
-						NVME_CAP_NSSRC(cap) : 0;
+				NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
 
 	if (dev->subsystem &&
 	    (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
 		writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-	result = nvme_disable_ctrl(&dev->ctrl, cap);
+	result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result < 0)
 		return result;
 
@@ -1422,7 +1439,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
 	lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-	result = nvme_enable_ctrl(&dev->ctrl, cap);
+	result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
 	if (result)
 		return result;
 
@@ -1441,7 +1458,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	unsigned i, max;
 	int ret = 0;
 
-	for (i = dev->queue_count; i <= dev->max_qid; i++) {
+	for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
 		/* vector == qid - 1, match nvme_create_queue */
 		if (!nvme_alloc_queue(dev, i, dev->q_depth,
 		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1467,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 		}
 	}
 
-	max = min(dev->max_qid, dev->queue_count - 1);
+	max = min(dev->max_qid, dev->ctrl.queue_count - 1);
 	for (i = dev->online_queues; i <= max; i++) {
 		ret = nvme_create_queue(dev->queues[i], i);
 		if (ret)
@@ -1585,9 +1602,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
 	struct nvme_host_mem_buf_desc *descs;
-	u32 chunk_size, max_entries, i = 0;
+	u32 chunk_size, max_entries;
+	int i = 0;
 	void **bufs;
-	u64 size, tmp;
+	u64 size = 0, tmp;
 
 	/* start big and work our way down */
 	chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
@@ -1866,7 +1884,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
-	u64 cap;
 	int result = -ENOMEM;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1893,10 +1910,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+	dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+				io_queue_depth);
+	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
 
 	/*
@@ -1908,6 +1926,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
 			"set queue depth=%u to work around controller resets\n",
 			dev->q_depth);
+	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+		   NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+		dev->q_depth = 64;
+		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+                        "set queue depth=%u\n", dev->q_depth);
 	}
 
 	/*
@@ -1996,7 +2020,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	nvme_stop_queues(&dev->ctrl);
 
 	queues = dev->online_queues - 1;
-	for (i = dev->queue_count - 1; i > 0; i--)
+	for (i = dev->ctrl.queue_count - 1; i > 0; i--)
 		nvme_suspend_queue(dev->queues[i]);
 
 	if (dead) {
@@ -2004,7 +2028,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		 * probe, before the admin queue is configured. Thus,
 		 * queue_count can be 0 here.
 		 */
-		if (dev->queue_count)
+		if (dev->ctrl.queue_count)
 			nvme_suspend_queue(dev->queues[0]);
 	} else {
 		nvme_disable_io_queues(dev, queues);
@@ -2094,7 +2118,7 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
-	result = nvme_configure_admin_queue(dev);
+	result = nvme_pci_configure_admin_queue(dev);
 	if (result)
 		goto out;
 
@@ -2133,15 +2157,6 @@ static void nvme_reset_work(struct work_struct *work)
 		goto out;
 
 	/*
-	 * A controller that can not execute IO typically requires user
-	 * intervention to correct. For such degraded controllers, the driver
-	 * should not submit commands the user did not request, so skip
-	 * registering for asynchronous event notification on this condition.
-	 */
-	if (dev->online_queues > 1)
-		nvme_queue_async_events(&dev->ctrl);
-
-	/*
 	 * Keep the controller around but remove all namespaces if we don't have
 	 * any working I/O queue.
 	 */
@@ -2161,8 +2176,7 @@ static void nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (dev->online_queues > 1)
-		nvme_queue_scan(&dev->ctrl);
+	nvme_start_ctrl(&dev->ctrl);
 	return;
 
  out:
@@ -2341,11 +2355,13 @@ static void nvme_remove(struct pci_dev *pdev)
 	}
 
 	flush_work(&dev->ctrl.reset_work);
-	nvme_uninit_ctrl(&dev->ctrl);
+	nvme_stop_ctrl(&dev->ctrl);
+	nvme_remove_namespaces(&dev->ctrl);
 	nvme_dev_disable(dev, true);
 	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
+	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_release_prp_pools(dev);
 	nvme_dev_unmap(dev);
 	nvme_put_ctrl(&dev->ctrl);
@@ -2458,6 +2474,10 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d4119d..da04df1 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c

@@ -86,7 +86,7 @@ enum nvme_rdma_queue_flags {
 
 struct nvme_rdma_queue {
 	struct nvme_rdma_qe	*rsp_ring;
-	u8			sig_count;
+	atomic_t		sig_count;
 	int			queue_size;
 	size_t			cmnd_capsule_len;
 	struct nvme_rdma_ctrl	*ctrl;
@@ -103,7 +103,6 @@ struct nvme_rdma_queue {
 struct nvme_rdma_ctrl {
 	/* read only in the hot path */
 	struct nvme_rdma_queue	*queues;
-	u32			queue_count;
 
 	/* other member variables */
 	struct blk_mq_tag_set	tag_set;
@@ -119,7 +118,6 @@ struct nvme_rdma_ctrl {
 	struct blk_mq_tag_set	admin_tag_set;
 	struct nvme_rdma_device	*device;
 
-	u64			cap;
 	u32			max_fr_pages;
 
 	struct sockaddr_storage addr;
@@ -274,9 +272,6 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	int ret = 0;
 
-	if (!req->mr->need_inval)
-		goto out;
-
 	ib_dereg_mr(req->mr);
 
 	req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
@@ -349,7 +344,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_rdma_ctrl *ctrl = data;
 	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -525,6 +520,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 		queue->cmnd_capsule_len = sizeof(struct nvme_command);
 
 	queue->queue_size = queue_size;
+	atomic_set(&queue->sig_count, 0);
 
 	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
 			RDMA_PS_TCP, IB_QPT_RC);
@@ -587,7 +583,7 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 }
 
@@ -595,7 +591,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	int i, ret = 0;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret) {
 			dev_info(ctrl->ctrl.device,
@@ -623,14 +619,14 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
 	if (ret)
 		return ret;
 
-	ctrl->queue_count = nr_io_queues + 1;
-	if (ctrl->queue_count < 2)
+	ctrl->ctrl.queue_count = nr_io_queues + 1;
+	if (ctrl->ctrl.queue_count < 2)
 		return 0;
 
 	dev_info(ctrl->ctrl.device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_rdma_init_queue(ctrl, i,
 					   ctrl->ctrl.opts->queue_size);
 		if (ret) {
@@ -705,7 +701,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	++ctrl->ctrl.nr_reconnects;
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_rdma_free_io_queues(ctrl);
 
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
@@ -729,13 +725,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (ret)
 		goto requeue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = nvme_rdma_init_io_queues(ctrl);
 		if (ret)
 			goto requeue;
@@ -743,16 +737,16 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto requeue;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 	ctrl->ctrl.nr_reconnects = 0;
 
-	if (ctrl->queue_count > 1) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -770,17 +764,17 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 			struct nvme_rdma_ctrl, err_work);
 	int i;
 
-	nvme_stop_keep_alive(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
 
-	for (i = 0; i < ctrl->queue_count; i++)
+	for (i = 0; i < ctrl->ctrl.queue_count; i++)
 		clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
 
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 
 	/* We must take care of fastfail/requeue all our inflight requests */
-	if (ctrl->queue_count > 1)
+	if (ctrl->ctrl.queue_count > 1)
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
@@ -790,7 +784,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 	 * queues are not a live anymore, so restart the queues to fail fast
 	 * new IO
 	 */
-	blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_start_queues(&ctrl->ctrl);
 
 	nvme_rdma_reconnect_or_remove(ctrl);
@@ -1008,17 +1002,16 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		nvme_rdma_wr_error(cq, wc, "SEND");
 }
 
-static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
+/*
+ * We want to signal completion at least every queue depth/2.  This returns the
+ * largest power of two that is not above half of (queue size + 1) to optimize
+ * (avoid divisions).
+ */
+static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
 {
-	int sig_limit;
+	int limit = 1 << ilog2((queue->queue_size + 1) / 2);
 
-	/*
-	 * We signal completion every queue depth/2 and also handle the
-	 * degenerated case of a  device with queue_depth=1, where we
-	 * would need to signal every message.
-	 */
-	sig_limit = max(queue->queue_size / 2, 1);
-	return (++queue->sig_count % sig_limit) == 0;
+	return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0;
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
@@ -1574,7 +1567,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 	set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP,
+			&ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -1582,9 +1576,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
@@ -1601,8 +1595,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -1620,11 +1612,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 
 static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->reconnect_work);
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -1634,18 +1625,21 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
 	if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl);
 }
 
 static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	if (shutdown)
 		nvme_rdma_shutdown_ctrl(ctrl);
 
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	if (ctrl->ctrl.tagset) {
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
 		blk_mq_free_tag_set(&ctrl->tag_set);
@@ -1707,6 +1701,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 	int ret;
 	bool changed;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_rdma_shutdown_ctrl(ctrl);
 
 	ret = nvme_rdma_configure_admin_queue(ctrl);
@@ -1716,7 +1711,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		goto del_dead_ctrl;
 	}
 
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		ret = blk_mq_reinit_tagset(&ctrl->tag_set);
 		if (ret)
 			goto del_dead_ctrl;
@@ -1728,16 +1723,15 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
 		ret = nvme_rdma_connect_io_queues(ctrl);
 		if (ret)
 			goto del_dead_ctrl;
+
+		blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+				ctrl->ctrl.queue_count - 1);
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->queue_count > 1) {
-		nvme_start_queues(&ctrl->ctrl);
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -1785,7 +1779,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 
 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
@@ -1863,12 +1857,12 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
-	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+	ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
-	ctrl->queues = kcalloc(ctrl->queue_count, sizeof(*ctrl->queues),
+	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
 				GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_uninit_ctrl;
@@ -1925,15 +1919,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 
 out_remove_admin_queue:
-	nvme_stop_keep_alive(&ctrl->ctrl);
 	nvme_rdma_destroy_admin_queue(ctrl);
 out_kfree_queues:
 	kfree(ctrl->queues);

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 7692a96..1e6dcc2 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c

@@ -1164,18 +1164,24 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
 
 	memset(acc, 0, sizeof(*acc));
 
-	if (iod->rqstdatalen < sizeof(struct fcnvme_ls_cr_assoc_rqst))
+	/*
+	 * FC-NVME spec changes. There are initiators sending different
+	 * lengths as padding sizes for Create Association Cmd descriptor
+	 * was incorrect.
+	 * Accept anything of "minimum" length. Assume format per 1.15
+	 * spec (with HOSTID reduced to 16 bytes), ignore how long the
+	 * trailing pad length is.
+	 */
+	if (iod->rqstdatalen < FCNVME_LSDESC_CRA_RQST_MINLEN)
 		ret = VERR_CR_ASSOC_LEN;
-	else if (rqst->desc_list_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_ls_cr_assoc_rqst)))
+	else if (rqst->desc_list_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN))
 		ret = VERR_CR_ASSOC_RQST_LEN;
 	else if (rqst->assoc_cmd.desc_tag !=
 			cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD))
 		ret = VERR_CR_ASSOC_CMD;
-	else if (rqst->assoc_cmd.desc_len !=
-			fcnvme_lsdesc_len(
-				sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)))
+	else if (rqst->assoc_cmd.desc_len <
+			cpu_to_be32(FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN))
 		ret = VERR_CR_ASSOC_CMD_LEN;
 	else if (!rqst->assoc_cmd.ersp_ratio ||
 		 (be16_to_cpu(rqst->assoc_cmd.ersp_ratio) >=

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 4012879..3b4d47a 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c

@@ -85,7 +85,7 @@ static void nvmet_execute_rw(struct nvmet_req *req)
 			bio_set_op_attrs(bio, op, op_flags);
 
 			bio_chain(bio, prev);
-			cookie = submit_bio(prev);
+			submit_bio(prev);
 		}
 
 		sector += sg->length >> 9;

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 5f55c68..717ed7d 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c

@@ -44,12 +44,10 @@ struct nvme_loop_iod {
 
 struct nvme_loop_ctrl {
 	struct nvme_loop_queue	*queues;
-	u32			queue_count;
 
 	struct blk_mq_tag_set	admin_tag_set;
 
 	struct list_head	list;
-	u64			cap;
 	struct blk_mq_tag_set	tag_set;
 	struct nvme_loop_iod	async_event_iod;
 	struct nvme_ctrl	ctrl;
@@ -241,7 +239,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_loop_ctrl *ctrl = data;
 	struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
 
-	BUG_ON(hctx_idx >= ctrl->queue_count);
+	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
 
 	hctx->driver_data = queue;
 	return 0;
@@ -307,7 +305,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i;
 
-	for (i = 1; i < ctrl->queue_count; i++)
+	for (i = 1; i < ctrl->ctrl.queue_count; i++)
 		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
 }
 
@@ -330,7 +328,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 		if (ret)
 			goto out_destroy_queues;
 
-		ctrl->queue_count++;
+		ctrl->ctrl.queue_count++;
 	}
 
 	return 0;
@@ -344,7 +342,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
 {
 	int i, ret;
 
-	for (i = 1; i < ctrl->queue_count; i++) {
+	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
 			return ret;
@@ -372,7 +370,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
 	if (error)
 		return error;
-	ctrl->queue_count = 1;
+	ctrl->ctrl.queue_count = 1;
 
 	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
 	if (error)
@@ -388,7 +386,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
+	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
@@ -396,9 +394,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize);
 
-	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
+	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
 		goto out_cleanup_queue;
 
@@ -409,8 +407,6 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	if (error)
 		goto out_cleanup_queue;
 
-	nvme_start_keep_alive(&ctrl->ctrl);
-
 	return 0;
 
 out_cleanup_queue:
@@ -424,9 +420,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-	nvme_stop_keep_alive(&ctrl->ctrl);
-
-	if (ctrl->queue_count > 1) {
+	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
@@ -436,9 +430,10 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
-	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
+	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
+	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_loop_destroy_admin_queue(ctrl);
 }
 
@@ -447,8 +442,10 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 				struct nvme_loop_ctrl, delete_work);
 
-	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_remove_namespaces(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
+	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -496,6 +493,7 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	bool changed;
 	int ret;
 
+	nvme_stop_ctrl(&ctrl->ctrl);
 	nvme_loop_shutdown_ctrl(ctrl);
 
 	ret = nvme_loop_configure_admin_queue(ctrl);
@@ -510,13 +508,13 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_destroy_io;
 
+	blk_mq_update_nr_hw_queues(&ctrl->tag_set,
+			ctrl->ctrl.queue_count - 1);
+
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	nvme_queue_scan(&ctrl->ctrl);
-	nvme_queue_async_events(&ctrl->ctrl);
-
-	nvme_start_queues(&ctrl->ctrl);
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return;
 
@@ -559,7 +557,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 	ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
 	ctrl->tag_set.driver_data = ctrl;
-	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
+	ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
 	ctrl->ctrl.tagset = &ctrl->tag_set;
 
@@ -651,10 +649,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
 	mutex_unlock(&nvme_loop_ctrl_mutex);
 
-	if (opts->nr_io_queues) {
-		nvme_queue_scan(&ctrl->ctrl);
-		nvme_queue_async_events(&ctrl->ctrl);
-	}
+	nvme_start_ctrl(&ctrl->ctrl);
 
 	return &ctrl->ctrl;
 

diff --git a/drivers/of/property.c b/drivers/of/property.c
index 07c7c36..eda50b4 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c

@@ -804,3 +804,151 @@ struct device_node *of_graph_get_remote_node(const struct device_node *node,
 	return remote;
 }
 EXPORT_SYMBOL(of_graph_get_remote_node);
+
+static void of_fwnode_get(struct fwnode_handle *fwnode)
+{
+	of_node_get(to_of_node(fwnode));
+}
+
+static void of_fwnode_put(struct fwnode_handle *fwnode)
+{
+	of_node_put(to_of_node(fwnode));
+}
+
+static bool of_fwnode_device_is_available(struct fwnode_handle *fwnode)
+{
+	return of_device_is_available(to_of_node(fwnode));
+}
+
+static bool of_fwnode_property_present(struct fwnode_handle *fwnode,
+				       const char *propname)
+{
+	return of_property_read_bool(to_of_node(fwnode), propname);
+}
+
+static int of_fwnode_property_read_int_array(struct fwnode_handle *fwnode,
+					     const char *propname,
+					     unsigned int elem_size, void *val,
+					     size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	if (!val)
+		return of_property_count_elems_of_size(node, propname,
+						       elem_size);
+
+	switch (elem_size) {
+	case sizeof(u8):
+		return of_property_read_u8_array(node, propname, val, nval);
+	case sizeof(u16):
+		return of_property_read_u16_array(node, propname, val, nval);
+	case sizeof(u32):
+		return of_property_read_u32_array(node, propname, val, nval);
+	case sizeof(u64):
+		return of_property_read_u64_array(node, propname, val, nval);
+	}
+
+	return -ENXIO;
+}
+
+static int of_fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+						const char *propname,
+						const char **val, size_t nval)
+{
+	struct device_node *node = to_of_node(fwnode);
+
+	return val ?
+		of_property_read_string_array(node, propname, val, nval) :
+		of_property_count_strings(node, propname);
+}
+
+static struct fwnode_handle *of_fwnode_get_parent(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_get_parent(to_of_node(fwnode)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_next_child_node(struct fwnode_handle *fwnode,
+			      struct fwnode_handle *child)
+{
+	return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode),
+							    to_of_node(child)));
+}
+
+static struct fwnode_handle *
+of_fwnode_get_named_child_node(struct fwnode_handle *fwnode,
+			       const char *childname)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *child;
+
+	for_each_available_child_of_node(node, child)
+		if (!of_node_cmp(child->name, childname))
+			return of_fwnode_handle(child);
+
+	return NULL;
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_next_endpoint(struct fwnode_handle *fwnode,
+				  struct fwnode_handle *prev)
+{
+	return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode),
+							   to_of_node(prev)));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_remote_endpoint(struct fwnode_handle *fwnode)
+{
+	return of_fwnode_handle(of_parse_phandle(to_of_node(fwnode),
+						 "remote-endpoint", 0));
+}
+
+static struct fwnode_handle *
+of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode)
+{
+	struct device_node *np;
+
+	/* Get the parent of the port */
+	np = of_get_next_parent(to_of_node(fwnode));
+	if (!np)
+		return NULL;
+
+	/* Is this the "ports" node? If not, it's the port parent. */
+	if (of_node_cmp(np->name, "ports"))
+		return of_fwnode_handle(np);
+
+	return of_fwnode_handle(of_get_next_parent(np));
+}
+
+static int of_fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
+					  struct fwnode_endpoint *endpoint)
+{
+	struct device_node *node = to_of_node(fwnode);
+	struct device_node *port_node = of_get_parent(node);
+
+	endpoint->local_fwnode = fwnode;
+
+	of_property_read_u32(port_node, "reg", &endpoint->port);
+	of_property_read_u32(node, "reg", &endpoint->id);
+
+	of_node_put(port_node);
+
+	return 0;
+}
+
+const struct fwnode_operations of_fwnode_ops = {
+	.get = of_fwnode_get,
+	.put = of_fwnode_put,
+	.device_is_available = of_fwnode_device_is_available,
+	.property_present = of_fwnode_property_present,
+	.property_read_int_array = of_fwnode_property_read_int_array,
+	.property_read_string_array = of_fwnode_property_read_string_array,
+	.get_parent = of_fwnode_get_parent,
+	.get_next_child_node = of_fwnode_get_next_child_node,
+	.get_named_child_node = of_fwnode_get_named_child_node,
+	.graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint,
+	.graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint,
+	.graph_get_port_parent = of_fwnode_graph_get_port_parent,
+	.graph_parse_endpoint = of_fwnode_graph_parse_endpoint,
+};

diff --git a/drivers/pci/host/pcie-rockchip.c b/drivers/pci/host/pcie-rockchip.c
index 5acf8694..7bb9870 100644
--- a/drivers/pci/host/pcie-rockchip.c
+++ b/drivers/pci/host/pcie-rockchip.c

@@ -1483,7 +1483,7 @@ static int rockchip_pcie_probe(struct platform_device *pdev)
 	bridge->swizzle_irq = pci_common_swizzle;
 
 	err = pci_scan_root_bus_bridge(bridge);
-	if (!err)
+	if (err < 0)
 		goto err_free_res;
 
 	bus = bridge->bus;

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 76bdae1..0ad6e29 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig

@@ -49,7 +49,7 @@
 
 config CROS_EC_LPC
         tristate "ChromeOS Embedded Controller (LPC)"
-        depends on MFD_CROS_EC && (X86 || COMPILE_TEST)
+        depends on MFD_CROS_EC && ACPI && (X86 || COMPILE_TEST)
         help
           If you say Y here, you get support for talking to the ChromeOS EC
           over an LPC bus. This uses a simple byte-level protocol with a
@@ -59,6 +59,18 @@
           To compile this driver as a module, choose M here: the
           module will be called cros_ec_lpc.
 
+config CROS_EC_LPC_MEC
+	bool "ChromeOS Embedded Controller LPC Microchip EC (MEC) variant"
+	depends on CROS_EC_LPC
+	default n
+	help
+	  If you say Y here, a variant LPC protocol for the Microchip EC
+	  will be used. Note that this variant is not backward compatible
+	  with non-Microchip ECs.
+
+	  If you have a ChromeOS Embedded Controller Microchip EC variant
+	  choose Y here.
+
 config CROS_EC_PROTO
         bool
         help

diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4f34627..66c345c 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile

@@ -2,8 +2,11 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 cros_ec_devs-objs			:= cros_ec_dev.o cros_ec_sysfs.o \
-					   cros_ec_lightbar.o cros_ec_vbc.o
+					   cros_ec_lightbar.o cros_ec_vbc.o \
+					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_devs.o
-obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpc.o
+cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
+cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)	+= cros_ec_lpc_mec.o
+obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
new file mode 100644
index 0000000..4cc66f4
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.c

@@ -0,0 +1,401 @@
+/*
+ * cros_ec_debugfs - debug logs for Chrome OS EC
+ *
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "cros_ec_dev.h"
+#include "cros_ec_debugfs.h"
+
+#define LOG_SHIFT		14
+#define LOG_SIZE		(1 << LOG_SHIFT)
+#define LOG_POLL_SEC		10
+
+#define CIRC_ADD(idx, size, value)	(((idx) + (value)) & ((size) - 1))
+
+/* struct cros_ec_debugfs - ChromeOS EC debugging information
+ *
+ * @ec: EC device this debugfs information belongs to
+ * @dir: dentry for debugfs files
+ * @log_buffer: circular buffer for console log information
+ * @read_msg: preallocated EC command and buffer to read console log
+ * @log_mutex: mutex to protect circular buffer
+ * @log_wq: waitqueue for log readers
+ * @log_poll_work: recurring task to poll EC for new console log data
+ * @panicinfo_blob: panicinfo debugfs blob
+ */
+struct cros_ec_debugfs {
+	struct cros_ec_dev *ec;
+	struct dentry *dir;
+	/* EC log */
+	struct circ_buf log_buffer;
+	struct cros_ec_command *read_msg;
+	struct mutex log_mutex;
+	wait_queue_head_t log_wq;
+	struct delayed_work log_poll_work;
+	/* EC panicinfo */
+	struct debugfs_blob_wrapper panicinfo_blob;
+};
+
+/*
+ * We need to make sure that the EC log buffer on the UART is large enough,
+ * so that it is unlikely enough to overlow within LOG_POLL_SEC.
+ */
+static void cros_ec_console_log_work(struct work_struct *__work)
+{
+	struct cros_ec_debugfs *debug_info =
+		container_of(to_delayed_work(__work),
+			     struct cros_ec_debugfs,
+			     log_poll_work);
+	struct cros_ec_dev *ec = debug_info->ec;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	struct cros_ec_command snapshot_msg = {
+		.command = EC_CMD_CONSOLE_SNAPSHOT + ec->cmd_offset,
+	};
+
+	struct ec_params_console_read_v1 *read_params =
+		(struct ec_params_console_read_v1 *)debug_info->read_msg->data;
+	uint8_t *ec_buffer = (uint8_t *)debug_info->read_msg->data;
+	int idx;
+	int buf_space;
+	int ret;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, &snapshot_msg);
+	if (ret < 0) {
+		dev_err(ec->dev, "EC communication failed\n");
+		goto resched;
+	}
+	if (snapshot_msg.result != EC_RES_SUCCESS) {
+		dev_err(ec->dev, "EC failed to snapshot the console log\n");
+		goto resched;
+	}
+
+	/* Loop until we have read everything, or there's an error. */
+	mutex_lock(&debug_info->log_mutex);
+	buf_space = CIRC_SPACE(cb->head, cb->tail, LOG_SIZE);
+
+	while (1) {
+		if (!buf_space) {
+			dev_info_once(ec->dev,
+				      "Some logs may have been dropped...\n");
+			break;
+		}
+
+		memset(read_params, '\0', sizeof(*read_params));
+		read_params->subcmd = CONSOLE_READ_RECENT;
+		ret = cros_ec_cmd_xfer(ec->ec_dev, debug_info->read_msg);
+		if (ret < 0) {
+			dev_err(ec->dev, "EC communication failed\n");
+			break;
+		}
+		if (debug_info->read_msg->result != EC_RES_SUCCESS) {
+			dev_err(ec->dev,
+				"EC failed to read the console log\n");
+			break;
+		}
+
+		/* If the buffer is empty, we're done here. */
+		if (ret == 0 || ec_buffer[0] == '\0')
+			break;
+
+		idx = 0;
+		while (idx < ret && ec_buffer[idx] != '\0' && buf_space > 0) {
+			cb->buf[cb->head] = ec_buffer[idx];
+			cb->head = CIRC_ADD(cb->head, LOG_SIZE, 1);
+			idx++;
+			buf_space--;
+		}
+
+		wake_up(&debug_info->log_wq);
+	}
+
+	mutex_unlock(&debug_info->log_mutex);
+
+resched:
+	schedule_delayed_work(&debug_info->log_poll_work,
+			      msecs_to_jiffies(LOG_POLL_SEC * 1000));
+}
+
+static int cros_ec_console_log_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t cros_ec_console_log_read(struct file *file, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	struct circ_buf *cb = &debug_info->log_buffer;
+	ssize_t ret;
+
+	mutex_lock(&debug_info->log_mutex);
+
+	while (!CIRC_CNT(cb->head, cb->tail, LOG_SIZE)) {
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			goto error;
+		}
+
+		mutex_unlock(&debug_info->log_mutex);
+
+		ret = wait_event_interruptible(debug_info->log_wq,
+					CIRC_CNT(cb->head, cb->tail, LOG_SIZE));
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&debug_info->log_mutex);
+	}
+
+	/* Only copy until the end of the circular buffer, and let userspace
+	 * retry to get the rest of the data.
+	 */
+	ret = min_t(size_t, CIRC_CNT_TO_END(cb->head, cb->tail, LOG_SIZE),
+		    count);
+
+	if (copy_to_user(buf, cb->buf + cb->tail, ret)) {
+		ret = -EFAULT;
+		goto error;
+	}
+
+	cb->tail = CIRC_ADD(cb->tail, LOG_SIZE, ret);
+
+error:
+	mutex_unlock(&debug_info->log_mutex);
+	return ret;
+}
+
+static unsigned int cros_ec_console_log_poll(struct file *file,
+					     poll_table *wait)
+{
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	unsigned int mask = 0;
+
+	poll_wait(file, &debug_info->log_wq, wait);
+
+	mutex_lock(&debug_info->log_mutex);
+	if (CIRC_CNT(debug_info->log_buffer.head,
+		     debug_info->log_buffer.tail,
+		     LOG_SIZE))
+		mask |= POLLIN | POLLRDNORM;
+	mutex_unlock(&debug_info->log_mutex);
+
+	return mask;
+}
+
+static int cros_ec_console_log_release(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+const struct file_operations cros_ec_console_log_fops = {
+	.owner = THIS_MODULE,
+	.open = cros_ec_console_log_open,
+	.read = cros_ec_console_log_read,
+	.llseek = no_llseek,
+	.poll = cros_ec_console_log_poll,
+	.release = cros_ec_console_log_release,
+};
+
+static int ec_read_version_supported(struct cros_ec_dev *ec)
+{
+	struct ec_params_get_cmd_versions_v1 *params;
+	struct ec_response_get_cmd_versions *response;
+	int ret;
+
+	struct cros_ec_command *msg;
+
+	msg = kzalloc(sizeof(*msg) + max(sizeof(*params), sizeof(*response)),
+		GFP_KERNEL);
+	if (!msg)
+		return 0;
+
+	msg->command = EC_CMD_GET_CMD_VERSIONS + ec->cmd_offset;
+	msg->outsize = sizeof(*params);
+	msg->insize = sizeof(*response);
+
+	params = (struct ec_params_get_cmd_versions_v1 *)msg->data;
+	params->cmd = EC_CMD_CONSOLE_READ;
+	response = (struct ec_response_get_cmd_versions *)msg->data;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg) >= 0 &&
+		msg->result == EC_RES_SUCCESS &&
+		(response->version_mask & EC_VER_MASK(1));
+
+	kfree(msg);
+
+	return ret;
+}
+
+static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
+{
+	struct cros_ec_dev *ec = debug_info->ec;
+	char *buf;
+	int read_params_size;
+	int read_response_size;
+
+	if (!ec_read_version_supported(ec)) {
+		dev_warn(ec->dev,
+			"device does not support reading the console log\n");
+		return 0;
+	}
+
+	buf = devm_kzalloc(ec->dev, LOG_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	read_params_size = sizeof(struct ec_params_console_read_v1);
+	read_response_size = ec->ec_dev->max_response;
+	debug_info->read_msg = devm_kzalloc(ec->dev,
+		sizeof(*debug_info->read_msg) +
+			max(read_params_size, read_response_size), GFP_KERNEL);
+	if (!debug_info->read_msg)
+		return -ENOMEM;
+
+	debug_info->read_msg->version = 1;
+	debug_info->read_msg->command = EC_CMD_CONSOLE_READ + ec->cmd_offset;
+	debug_info->read_msg->outsize = read_params_size;
+	debug_info->read_msg->insize = read_response_size;
+
+	debug_info->log_buffer.buf = buf;
+	debug_info->log_buffer.head = 0;
+	debug_info->log_buffer.tail = 0;
+
+	mutex_init(&debug_info->log_mutex);
+	init_waitqueue_head(&debug_info->log_wq);
+
+	if (!debugfs_create_file("console_log",
+				 S_IFREG | S_IRUGO,
+				 debug_info->dir,
+				 debug_info,
+				 &cros_ec_console_log_fops))
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&debug_info->log_poll_work,
+			  cros_ec_console_log_work);
+	schedule_delayed_work(&debug_info->log_poll_work, 0);
+
+	return 0;
+}
+
+static void cros_ec_cleanup_console_log(struct cros_ec_debugfs *debug_info)
+{
+	if (debug_info->log_buffer.buf) {
+		cancel_delayed_work_sync(&debug_info->log_poll_work);
+		mutex_destroy(&debug_info->log_mutex);
+	}
+}
+
+static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
+{
+	struct cros_ec_device *ec_dev = debug_info->ec->ec_dev;
+	int ret;
+	struct cros_ec_command *msg;
+	int insize;
+
+	insize = ec_dev->max_response;
+
+	msg = devm_kzalloc(debug_info->ec->dev,
+			sizeof(*msg) + insize, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->command = EC_CMD_GET_PANIC_INFO;
+	msg->insize = insize;
+
+	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	if (ret < 0) {
+		dev_warn(debug_info->ec->dev, "Cannot read panicinfo.\n");
+		ret = 0;
+		goto free;
+	}
+
+	/* No panic data */
+	if (ret == 0)
+		goto free;
+
+	debug_info->panicinfo_blob.data = msg->data;
+	debug_info->panicinfo_blob.size = ret;
+
+	if (!debugfs_create_blob("panicinfo",
+				 S_IFREG | S_IRUGO,
+				 debug_info->dir,
+				 &debug_info->panicinfo_blob)) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return 0;
+
+free:
+	devm_kfree(debug_info->ec->dev, msg);
+	return ret;
+}
+
+int cros_ec_debugfs_init(struct cros_ec_dev *ec)
+{
+	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
+	const char *name = ec_platform->ec_name;
+	struct cros_ec_debugfs *debug_info;
+	int ret;
+
+	debug_info = devm_kzalloc(ec->dev, sizeof(*debug_info), GFP_KERNEL);
+	if (!debug_info)
+		return -ENOMEM;
+
+	debug_info->ec = ec;
+	debug_info->dir = debugfs_create_dir(name, NULL);
+	if (!debug_info->dir)
+		return -ENOMEM;
+
+	ret = cros_ec_create_panicinfo(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
+	ret = cros_ec_create_console_log(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
+	ec->debug_info = debug_info;
+
+	return 0;
+
+remove_debugfs:
+	debugfs_remove_recursive(debug_info->dir);
+	return ret;
+}
+
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec)
+{
+	if (!ec->debug_info)
+		return;
+
+	debugfs_remove_recursive(ec->debug_info->dir);
+	cros_ec_cleanup_console_log(ec->debug_info);
+}

diff --git a/drivers/platform/chrome/cros_ec_debugfs.h b/drivers/platform/chrome/cros_ec_debugfs.h
new file mode 100644
index 0000000..1ff3a50
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_debugfs.h

@@ -0,0 +1,27 @@
+/*
+ * Copyright 2015 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DRV_CROS_EC_DEBUGFS_H_
+#define _DRV_CROS_EC_DEBUGFS_H_
+
+#include "cros_ec_dev.h"
+
+/* debugfs stuff */
+int cros_ec_debugfs_init(struct cros_ec_dev *ec);
+void cros_ec_debugfs_remove(struct cros_ec_dev *ec);
+
+#endif  /* _DRV_CROS_EC_DEBUGFS_H_ */

diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
index 6aa120c..cf6c4f0 100644
--- a/drivers/platform/chrome/cros_ec_dev.c
+++ b/drivers/platform/chrome/cros_ec_dev.c

@@ -21,9 +21,11 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+#include "cros_ec_debugfs.h"
 #include "cros_ec_dev.h"
 
 /* Device variables */
@@ -427,10 +429,16 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	if (cros_ec_debugfs_init(ec))
+		dev_warn(dev, "failed to create debugfs directory\n");
+
 	/* check whether this EC is a sensor hub. */
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
 
+	/* Take control of the lightbar from the EC. */
+	lb_manual_suspend_ctrl(ec, 1);
+
 	return 0;
 
 failed:
@@ -441,6 +449,12 @@ static int ec_device_probe(struct platform_device *pdev)
 static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
+
+	/* Let the EC take over the lightbar again. */
+	lb_manual_suspend_ctrl(ec, 0);
+
+	cros_ec_debugfs_remove(ec);
+
 	cdev_del(&ec->cdev);
 	device_unregister(&ec->class_dev);
 	return 0;
@@ -452,9 +466,35 @@ static const struct platform_device_id cros_ec_id[] = {
 };
 MODULE_DEVICE_TABLE(platform, cros_ec_id);
 
+static __maybe_unused int ec_device_suspend(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	lb_suspend(ec);
+
+	return 0;
+}
+
+static __maybe_unused int ec_device_resume(struct device *dev)
+{
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	lb_resume(ec);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cros_ec_dev_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = ec_device_suspend,
+	.resume = ec_device_resume,
+#endif
+};
+
 static struct platform_driver cros_ec_dev_driver = {
 	.driver = {
 		.name = "cros-ec-ctl",
+		.pm = &cros_ec_dev_pm_ops,
 	},
 	.probe = ec_device_probe,
 	.remove = ec_device_remove,

diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h
index bfd2c84..45e9453 100644
--- a/drivers/platform/chrome/cros_ec_dev.h
+++ b/drivers/platform/chrome/cros_ec_dev.h

@@ -43,4 +43,10 @@ struct cros_ec_readmem {
 #define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
 #define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
 
+/* Lightbar utilities */
+extern bool ec_has_lightbar(struct cros_ec_dev *ec);
+extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable);
+extern int lb_suspend(struct cros_ec_dev *ec);
+extern int lb_resume(struct cros_ec_dev *ec);
+
 #endif /* _CROS_EC_DEV_H_ */

diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 8df3d44..fd2b047 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c

@@ -38,6 +38,13 @@
 /* Rate-limit the lightbar interface to prevent DoS. */
 static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
 
+/*
+ * Whether or not we have given userspace control of the lightbar.
+ * If this is true, we won't do anything during suspend/resume.
+ */
+static bool userspace_control;
+static struct cros_ec_dev *ec_with_lightbar;
+
 static ssize_t interval_msec_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -295,7 +302,8 @@ static ssize_t led_rgb_store(struct device *dev, struct device_attribute *attr,
 
 static char const *seqname[] = {
 	"ERROR", "S5", "S3", "S0", "S5S3", "S3S0",
-	"S0S3", "S3S5", "STOP", "RUN", "PULSE", "TEST", "KONAMI",
+	"S0S3", "S3S5", "STOP", "RUN", "KONAMI",
+	"TAP", "PROGRAM",
 };
 
 static ssize_t sequence_show(struct device *dev,
@@ -340,6 +348,89 @@ static ssize_t sequence_show(struct device *dev,
 	return ret;
 }
 
+static int lb_send_empty_cmd(struct cros_ec_dev *ec, uint8_t cmd)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = cmd;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
+{
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	if (ec != ec_with_lightbar)
+		return 0;
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_lightbar *)msg->data;
+
+	param->cmd = LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL;
+	param->manual_suspend_ctrl.enable = enable;
+
+	ret = lb_throttle();
+	if (ret)
+		goto error;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto error;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto error;
+	}
+	ret = 0;
+error:
+	kfree(msg);
+
+	return ret;
+}
+
+int lb_suspend(struct cros_ec_dev *ec)
+{
+	if (userspace_control || ec != ec_with_lightbar)
+		return 0;
+
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
+}
+
+int lb_resume(struct cros_ec_dev *ec)
+{
+	if (userspace_control || ec != ec_with_lightbar)
+		return 0;
+
+	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
+}
+
 static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -390,6 +481,93 @@ static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 	return ret;
 }
 
+static ssize_t program_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int extra_bytes, max_size, ret;
+	struct ec_params_lightbar *param;
+	struct cros_ec_command *msg;
+	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
+					      class_dev);
+
+	/*
+	 * We might need to reject the program for size reasons. The EC
+	 * enforces a maximum program size, but we also don't want to try
+	 * and send a program that is too big for the protocol. In order
+	 * to ensure the latter, we also need to ensure we have extra bytes
+	 * to represent the rest of the packet.
+	 */
+	extra_bytes = sizeof(*param) - sizeof(param->set_program.data);
+	max_size = min(EC_LB_PROG_LEN, ec->ec_dev->max_request - extra_bytes);
+	if (count > max_size) {
+		dev_err(dev, "Program is %u bytes, too long to send (max: %u)",
+			(unsigned int)count, max_size);
+
+		return -EINVAL;
+	}
+
+	msg = alloc_lightbar_cmd_msg(ec);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = lb_throttle();
+	if (ret)
+		goto exit;
+
+	dev_info(dev, "Copying %zu byte program to EC", count);
+
+	param = (struct ec_params_lightbar *)msg->data;
+	param->cmd = LIGHTBAR_CMD_SET_PROGRAM;
+
+	param->set_program.size = count;
+	memcpy(param->set_program.data, buf, count);
+
+	/*
+	 * We need to set the message size manually or else it will use
+	 * EC_LB_PROG_LEN. This might be too long, and the program
+	 * is unlikely to use all of the space.
+	 */
+	msg->outsize = count + extra_bytes;
+
+	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	if (ret < 0)
+		goto exit;
+	if (msg->result != EC_RES_SUCCESS) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = count;
+exit:
+	kfree(msg);
+
+	return ret;
+}
+
+static ssize_t userspace_control_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%d\n", userspace_control);
+}
+
+static ssize_t userspace_control_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
+{
+	bool enable;
+	int ret;
+
+	ret = strtobool(buf, &enable);
+	if (ret < 0)
+		return ret;
+
+	userspace_control = enable;
+
+	return count;
+}
+
 /* Module initialization */
 
 static DEVICE_ATTR_RW(interval_msec);
@@ -397,15 +575,25 @@ static DEVICE_ATTR_RO(version);
 static DEVICE_ATTR_WO(brightness);
 static DEVICE_ATTR_WO(led_rgb);
 static DEVICE_ATTR_RW(sequence);
+static DEVICE_ATTR_WO(program);
+static DEVICE_ATTR_RW(userspace_control);
+
 static struct attribute *__lb_cmds_attrs[] = {
 	&dev_attr_interval_msec.attr,
 	&dev_attr_version.attr,
 	&dev_attr_brightness.attr,
 	&dev_attr_led_rgb.attr,
 	&dev_attr_sequence.attr,
+	&dev_attr_program.attr,
+	&dev_attr_userspace_control.attr,
 	NULL,
 };
 
+bool ec_has_lightbar(struct cros_ec_dev *ec)
+{
+	return !!get_lightbar_version(ec, NULL, NULL);
+}
+
 static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 						  struct attribute *a, int n)
 {
@@ -422,10 +610,11 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 		return 0;
 
 	/* Only instantiate this stuff if the EC has a lightbar */
-	if (get_lightbar_version(ec, NULL, NULL))
+	if (ec_has_lightbar(ec)) {
+		ec_with_lightbar = ec;
 		return a->mode;
-	else
-		return 0;
+	}
+	return 0;
 }
 
 struct attribute_group cros_ec_lightbar_attr_group = {

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index f9a2454..2b6436d 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c

@@ -21,24 +21,29 @@
  * expensive.
  */
 
+#include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_reg.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 
-#define DRV_NAME "cros_ec_lpc"
+#define DRV_NAME "cros_ec_lpcs"
+#define ACPI_DRV_NAME "GOOG0004"
 
 static int ec_response_timed_out(void)
 {
 	unsigned long one_second = jiffies + HZ;
+	u8 data;
 
 	usleep_range(200, 300);
 	do {
-		if (!(inb(EC_LPC_ADDR_HOST_CMD) & EC_LPC_STATUS_BUSY_MASK))
+		if (!(cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_CMD, 1, &data) &
+		    EC_LPC_STATUS_BUSY_MASK))
 			return 0;
 		usleep_range(100, 200);
 	} while (time_before(jiffies, one_second));
@@ -51,21 +56,20 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 {
 	struct ec_host_request *request;
 	struct ec_host_response response;
-	u8 sum = 0;
-	int i;
+	u8 sum;
 	int ret = 0;
 	u8 *dout;
 
 	ret = cros_ec_prepare_tx(ec, msg);
 
 	/* Write buffer */
-	for (i = 0; i < ret; i++)
-		outb(ec->dout[i], EC_LPC_ADDR_HOST_PACKET + i);
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PACKET, ret, ec->dout);
 
 	request = (struct ec_host_request *)ec->dout;
 
 	/* Here we go */
-	outb(EC_COMMAND_PROTOCOL_3, EC_LPC_ADDR_HOST_CMD);
+	sum = EC_COMMAND_PROTOCOL_3;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -74,17 +78,15 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back response */
 	dout = (u8 *)&response;
-	for (i = 0; i < sizeof(response); i++) {
-		dout[i] = inb(EC_LPC_ADDR_HOST_PACKET + i);
-		sum += dout[i];
-	}
+	sum = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET, sizeof(response),
+				     dout);
 
 	msg->result = response.result;
 
@@ -97,11 +99,9 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Read response and process checksum */
-	for (i = 0; i < response.data_len; i++) {
-		msg->data[i] =
-			inb(EC_LPC_ADDR_HOST_PACKET + sizeof(response) + i);
-		sum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PACKET +
+				      sizeof(response), response.data_len,
+				      msg->data);
 
 	if (sum) {
 		dev_err(ec->dev,
@@ -121,8 +121,7 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 				struct cros_ec_command *msg)
 {
 	struct ec_lpc_host_args args;
-	int csum;
-	int i;
+	u8 sum;
 	int ret = 0;
 
 	if (msg->outsize > EC_PROTO2_MAX_PARAM_SIZE ||
@@ -139,24 +138,20 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	args.data_size = msg->outsize;
 
 	/* Initialize checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Copy data and update checksum */
-	for (i = 0; i < msg->outsize; i++) {
-		outb(msg->data[i], EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PARAM, msg->outsize,
+				       msg->data);
 
 	/* Finalize checksum and write args */
-	args.checksum = csum & 0xFF;
-	outb(args.flags, EC_LPC_ADDR_HOST_ARGS);
-	outb(args.command_version, EC_LPC_ADDR_HOST_ARGS + 1);
-	outb(args.data_size, EC_LPC_ADDR_HOST_ARGS + 2);
-	outb(args.checksum, EC_LPC_ADDR_HOST_ARGS + 3);
+	args.checksum = sum;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+				(u8 *)&args);
 
 	/* Here we go */
-	outb(msg->command, EC_LPC_ADDR_HOST_CMD);
+	sum = msg->command;
+	cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum);
 
 	if (ec_response_timed_out()) {
 		dev_warn(ec->dev, "EC responsed timed out\n");
@@ -165,16 +160,14 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Check result */
-	msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+	msg->result = cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_DATA, 1, &sum);
 	ret = cros_ec_check_result(ec, msg);
 	if (ret)
 		goto done;
 
 	/* Read back args */
-	args.flags = inb(EC_LPC_ADDR_HOST_ARGS);
-	args.command_version = inb(EC_LPC_ADDR_HOST_ARGS + 1);
-	args.data_size = inb(EC_LPC_ADDR_HOST_ARGS + 2);
-	args.checksum = inb(EC_LPC_ADDR_HOST_ARGS + 3);
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_ARGS, sizeof(args),
+			       (u8 *)&args);
 
 	if (args.data_size > msg->insize) {
 		dev_err(ec->dev,
@@ -185,20 +178,17 @@ static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
 	}
 
 	/* Start calculating response checksum */
-	csum = msg->command + args.flags +
-		args.command_version + args.data_size;
+	sum = msg->command + args.flags + args.command_version + args.data_size;
 
 	/* Read response and update checksum */
-	for (i = 0; i < args.data_size; i++) {
-		msg->data[i] = inb(EC_LPC_ADDR_HOST_PARAM + i);
-		csum += msg->data[i];
-	}
+	sum += cros_ec_lpc_read_bytes(EC_LPC_ADDR_HOST_PARAM, args.data_size,
+				      msg->data);
 
 	/* Verify checksum */
-	if (args.checksum != (csum & 0xFF)) {
+	if (args.checksum != sum) {
 		dev_err(ec->dev,
 			"bad packet checksum, expected %02x, got %02x\n",
-			args.checksum, csum & 0xFF);
+			args.checksum, sum);
 		ret = -EBADMSG;
 		goto done;
 	}
@@ -222,14 +212,13 @@ static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
 
 	/* fixed length */
 	if (bytes) {
-		for (; cnt < bytes; i++, s++, cnt++)
-			*s = inb(EC_LPC_ADDR_MEMMAP + i);
-		return cnt;
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + offset, bytes, s);
+		return bytes;
 	}
 
 	/* string */
 	for (; i < EC_MEMMAP_SIZE; i++, s++) {
-		*s = inb(EC_LPC_ADDR_MEMMAP + i);
+		cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + i, 1, s);
 		cnt++;
 		if (!*s)
 			break;
@@ -238,10 +227,23 @@ static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
 	return cnt;
 }
 
+static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
+{
+	struct cros_ec_device *ec_dev = data;
+
+	if (ec_dev->mkbp_event_supported &&
+	    cros_ec_get_next_event(ec_dev, NULL) > 0)
+		blocking_notifier_call_chain(&ec_dev->event_notifier, 0,
+					     ec_dev);
+}
+
 static int cros_ec_lpc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct acpi_device *adev;
+	acpi_status status;
 	struct cros_ec_device *ec_dev;
+	u8 buf[2];
 	int ret;
 
 	if (!devm_request_region(dev, EC_LPC_ADDR_MEMMAP, EC_MEMMAP_SIZE,
@@ -250,8 +252,8 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	if ((inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID) != 'E') ||
-	    (inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID + 1) != 'C')) {
+	cros_ec_lpc_read_bytes(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID, 2, buf);
+	if (buf[0] != 'E' || buf[1] != 'C') {
 		dev_err(dev, "EC ID not detected\n");
 		return -ENODEV;
 	}
@@ -287,12 +289,33 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/*
+	 * Connect a notify handler to process MKBP messages if we have a
+	 * companion ACPI device.
+	 */
+	adev = ACPI_COMPANION(dev);
+	if (adev) {
+		status = acpi_install_notify_handler(adev->handle,
+						     ACPI_ALL_NOTIFY,
+						     cros_ec_lpc_acpi_notify,
+						     ec_dev);
+		if (ACPI_FAILURE(status))
+			dev_warn(dev, "Failed to register notifier %08x\n",
+				 status);
+	}
+
 	return 0;
 }
 
 static int cros_ec_lpc_remove(struct platform_device *pdev)
 {
 	struct cros_ec_device *ec_dev;
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&pdev->dev);
+	if (adev)
+		acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
+					   cros_ec_lpc_acpi_notify);
 
 	ec_dev = platform_get_drvdata(pdev);
 	cros_ec_remove(ec_dev);
@@ -300,6 +323,12 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = {
+	{ ACPI_DRV_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, cros_ec_lpc_acpi_device_ids);
+
 static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
 	{
 		/*
@@ -337,18 +366,36 @@ static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
 };
 MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
 
+#ifdef CONFIG_PM_SLEEP
+static int cros_ec_lpc_suspend(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_suspend(ec_dev);
+}
+
+static int cros_ec_lpc_resume(struct device *dev)
+{
+	struct cros_ec_device *ec_dev = dev_get_drvdata(dev);
+
+	return cros_ec_resume(ec_dev);
+}
+#endif
+
+const struct dev_pm_ops cros_ec_lpc_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(cros_ec_lpc_suspend, cros_ec_lpc_resume)
+};
+
 static struct platform_driver cros_ec_lpc_driver = {
 	.driver = {
 		.name = DRV_NAME,
+		.acpi_match_table = cros_ec_lpc_acpi_device_ids,
+		.pm = &cros_ec_lpc_pm_ops,
 	},
 	.probe = cros_ec_lpc_probe,
 	.remove = cros_ec_lpc_remove,
 };
 
-static struct platform_device cros_ec_lpc_device = {
-	.name = DRV_NAME
-};
-
 static int __init cros_ec_lpc_init(void)
 {
 	int ret;
@@ -358,18 +405,13 @@ static int __init cros_ec_lpc_init(void)
 		return -ENODEV;
 	}
 
+	cros_ec_lpc_reg_init();
+
 	/* Register the driver */
 	ret = platform_driver_register(&cros_ec_lpc_driver);
 	if (ret) {
 		pr_err(DRV_NAME ": can't register driver: %d\n", ret);
-		return ret;
-	}
-
-	/* Register the device, and it'll get hooked up automatically */
-	ret = platform_device_register(&cros_ec_lpc_device);
-	if (ret) {
-		pr_err(DRV_NAME ": can't register device: %d\n", ret);
-		platform_driver_unregister(&cros_ec_lpc_driver);
+		cros_ec_lpc_reg_destroy();
 		return ret;
 	}
 
@@ -378,8 +420,8 @@ static int __init cros_ec_lpc_init(void)
 
 static void __exit cros_ec_lpc_exit(void)
 {
-	platform_device_unregister(&cros_ec_lpc_device);
 	platform_driver_unregister(&cros_ec_lpc_driver);
+	cros_ec_lpc_reg_destroy();
 }
 
 module_init(cros_ec_lpc_init);

diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
new file mode 100644
index 0000000..2eda2c2
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c

@@ -0,0 +1,140 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/*
+ * This mutex must be held while accessing the EMI unit. We can't rely on the
+ * EC mutex because memmap data may be accessed without it being held.
+ */
+static struct mutex io_mutex;
+
+/*
+ * cros_ec_lpc_mec_emi_write_address
+ *
+ * Initialize EMI read / write at a given address.
+ *
+ * @addr:        Starting read / write address
+ * @access_type: Type of access, typically 32-bit auto-increment
+ */
+static void cros_ec_lpc_mec_emi_write_address(u16 addr,
+			enum cros_ec_lpc_mec_emi_access_mode access_type)
+{
+	/* Address relative to start of EMI range */
+	addr -= MEC_EMI_RANGE_START;
+	outb((addr & 0xfc) | access_type, MEC_EMI_EC_ADDRESS_B0);
+	outb((addr >> 8) & 0x7f, MEC_EMI_EC_ADDRESS_B1);
+}
+
+/*
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length,
+			    u8 *buf)
+{
+	int i = 0;
+	int io_addr;
+	u8 sum = 0;
+	enum cros_ec_lpc_mec_emi_access_mode access, new_access;
+
+	/*
+	 * Long access cannot be used on misaligned data since reading B0 loads
+	 * the data register and writing B3 flushes.
+	 */
+	if (offset & 0x3 || length < 4)
+		access = ACCESS_TYPE_BYTE;
+	else
+		access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+	mutex_lock(&io_mutex);
+
+	/* Initialize I/O at desired address */
+	cros_ec_lpc_mec_emi_write_address(offset, access);
+
+	/* Skip bytes in case of misaligned offset */
+	io_addr = MEC_EMI_EC_DATA_B0 + (offset & 0x3);
+	while (i < length) {
+		while (io_addr <= MEC_EMI_EC_DATA_B3) {
+			if (io_type == MEC_IO_READ)
+				buf[i] = inb(io_addr++);
+			else
+				outb(buf[i], io_addr++);
+
+			sum += buf[i++];
+			offset++;
+
+			/* Extra bounds check in case of misaligned length */
+			if (i == length)
+				goto done;
+		}
+
+		/*
+		 * Use long auto-increment access except for misaligned write,
+		 * since writing B3 triggers the flush.
+		 */
+		if (length - i < 4 && io_type == MEC_IO_WRITE)
+			new_access = ACCESS_TYPE_BYTE;
+		else
+			new_access = ACCESS_TYPE_LONG_AUTO_INCREMENT;
+
+		if (new_access != access ||
+		    access != ACCESS_TYPE_LONG_AUTO_INCREMENT) {
+			access = new_access;
+			cros_ec_lpc_mec_emi_write_address(offset, access);
+		}
+
+		/* Access [B0, B3] on each loop pass */
+		io_addr = MEC_EMI_EC_DATA_B0;
+	}
+
+done:
+	mutex_unlock(&io_mutex);
+
+	return sum;
+}
+EXPORT_SYMBOL(cros_ec_lpc_io_bytes_mec);
+
+void cros_ec_lpc_mec_init(void)
+{
+	mutex_init(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_init);
+
+void cros_ec_lpc_mec_destroy(void)
+{
+	mutex_destroy(&io_mutex);
+}
+EXPORT_SYMBOL(cros_ec_lpc_mec_destroy);

diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
new file mode 100644
index 0000000..dcc7a3e
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c

@@ -0,0 +1,133 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/io.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/mfd/cros_ec_lpc_mec.h>
+
+static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		dest[i] = inb(offset + i);
+		sum += dest[i];
+	}
+
+	/* Return checksum of all bytes read */
+	return sum;
+}
+
+static u8 lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	int i;
+	int sum = 0;
+
+	for (i = 0; i < length; ++i) {
+		outb(msg[i], offset + i);
+		sum += msg[i];
+	}
+
+	/* Return checksum of all bytes written */
+	return sum;
+}
+
+#ifdef CONFIG_CROS_EC_LPC_MEC
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_READ, offset, length,
+						dest);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	if (length == 0)
+		return 0;
+
+	/* Access desired range through EMI interface */
+	if (offset >= MEC_EMI_RANGE_START && offset <= MEC_EMI_RANGE_END) {
+		/* Ensure we don't straddle EMI region */
+		if (WARN_ON(offset + length - 1 > MEC_EMI_RANGE_END))
+			return 0;
+
+		return cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, offset, length,
+						msg);
+	}
+
+	if (WARN_ON(offset + length > MEC_EMI_RANGE_START &&
+		    offset < MEC_EMI_RANGE_START))
+		return 0;
+
+	return lpc_write_bytes(offset, length, msg);
+}
+
+void cros_ec_lpc_reg_init(void)
+{
+	cros_ec_lpc_mec_init();
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+	cros_ec_lpc_mec_destroy();
+}
+
+#else /* CONFIG_CROS_EC_LPC_MEC */
+
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
+{
+	return lpc_read_bytes(offset, length, dest);
+}
+
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg)
+{
+	return lpc_write_bytes(offset, length, msg);
+}
+
+void cros_ec_lpc_reg_init(void)
+{
+}
+
+void cros_ec_lpc_reg_destroy(void)
+{
+}
+
+#endif /* CONFIG_CROS_EC_LPC_MEC */

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index ed5dee7..8dfa7fc 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c

@@ -150,6 +150,40 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
+/*
+ * cros_ec_get_host_event_wake_mask
+ *
+ * Get the mask of host events that cause wake from suspend.
+ *
+ * @ec_dev: EC device to call
+ * @msg: message structure to use
+ * @mask: result when function returns >=0.
+ *
+ * LOCKING:
+ * the caller has ec_dev->lock mutex, or the caller knows there is
+ * no other command in progress.
+ */
+static int cros_ec_get_host_event_wake_mask(struct cros_ec_device *ec_dev,
+					    struct cros_ec_command *msg,
+					    uint32_t *mask)
+{
+	struct ec_response_host_event_mask *r;
+	int ret;
+
+	msg->command = EC_CMD_HOST_EVENT_GET_WAKE_MASK;
+	msg->version = 0;
+	msg->outsize = 0;
+	msg->insize = sizeof(*r);
+
+	ret = send_command(ec_dev, msg);
+	if (ret > 0) {
+		r = (struct ec_response_host_event_mask *)msg->data;
+		*mask = r->mask;
+	}
+
+	return ret;
+}
+
 static int cros_ec_host_command_proto_query(struct cros_ec_device *ec_dev,
 					    int devidx,
 					    struct cros_ec_command *msg)
@@ -235,6 +269,22 @@ static int cros_ec_host_command_proto_query_v2(struct cros_ec_device *ec_dev)
 	return ret;
 }
 
+/*
+ * cros_ec_get_host_command_version_mask
+ *
+ * Get the version mask of a given command.
+ *
+ * @ec_dev: EC device to call
+ * @msg: message structure to use
+ * @cmd: command to get the version of.
+ * @mask: result when function returns 0.
+ *
+ * @return 0 on success, error code otherwise
+ *
+ * LOCKING:
+ * the caller has ec_dev->lock mutex or the caller knows there is
+ * no other command in progress.
+ */
 static int cros_ec_get_host_command_version_mask(struct cros_ec_device *ec_dev,
 	u16 cmd, u32 *mask)
 {
@@ -256,7 +306,7 @@ static int cros_ec_get_host_command_version_mask(struct cros_ec_device *ec_dev,
 	pver = (struct ec_params_get_cmd_versions *)msg->data;
 	pver->cmd = cmd;
 
-	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	ret = send_command(ec_dev, msg);
 	if (ret > 0) {
 		rver = (struct ec_response_get_cmd_versions *)msg->data;
 		*mask = rver->version_mask;
@@ -371,6 +421,17 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev)
 	else
 		ec_dev->mkbp_event_supported = 1;
 
+	/*
+	 * Get host event wake mask, assume all events are wake events
+	 * if unavailable.
+	 */
+	ret = cros_ec_get_host_event_wake_mask(ec_dev, proto_msg,
+					       &ec_dev->host_event_wake_mask);
+	if (ret < 0)
+		ec_dev->host_event_wake_mask = U32_MAX;
+
+	ret = 0;
+
 exit:
 	kfree(proto_msg);
 	return ret;
@@ -486,11 +547,54 @@ static int get_keyboard_state_event(struct cros_ec_device *ec_dev)
 	return ec_dev->event_size;
 }
 
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev)
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event)
 {
-	if (ec_dev->mkbp_event_supported)
-		return get_next_event(ec_dev);
-	else
-		return get_keyboard_state_event(ec_dev);
+	u32 host_event;
+	int ret;
+
+	if (!ec_dev->mkbp_event_supported) {
+		ret = get_keyboard_state_event(ec_dev);
+		if (ret < 0)
+			return ret;
+
+		if (wake_event)
+			*wake_event = true;
+
+		return ret;
+	}
+
+	ret = get_next_event(ec_dev);
+	if (ret < 0)
+		return ret;
+
+	if (wake_event) {
+		host_event = cros_ec_get_host_event(ec_dev);
+
+		/* Consider non-host_event as wake event */
+		*wake_event = !host_event ||
+			      !!(host_event & ec_dev->host_event_wake_mask);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL(cros_ec_get_next_event);
+
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev)
+{
+	u32 host_event;
+
+	BUG_ON(!ec_dev->mkbp_event_supported);
+
+	if (ec_dev->event_data.event_type != EC_MKBP_EVENT_HOST_EVENT)
+		return 0;
+
+	if (ec_dev->event_size != sizeof(host_event)) {
+		dev_warn(ec_dev->dev, "Invalid host event size\n");
+		return 0;
+	}
+
+	host_event = get_unaligned_le32(&ec_dev->event_data.data.host_event);
+
+	return host_event;
+}
+EXPORT_SYMBOL(cros_ec_get_host_event);

diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 9866fec..0831b42 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c

@@ -604,7 +604,7 @@ static struct attribute *hdmi_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group hdmi_attribute_group = {
+static const struct attribute_group hdmi_attribute_group = {
 	.name = "hdmi",
 	.attrs = hdmi_attrs,
 };
@@ -660,7 +660,7 @@ static struct attribute *amplifier_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group amplifier_attribute_group = {
+static const struct attribute_group amplifier_attribute_group = {
 	.name = "amplifier",
 	.attrs = amplifier_attrs,
 };
@@ -741,7 +741,7 @@ static struct attribute *deepsleep_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group deepsleep_attribute_group = {
+static const struct attribute_group deepsleep_attribute_group = {
 	.name = "deepsleep",
 	.attrs = deepsleep_attrs,
 };

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 6c7d860..709e3a6 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c

@@ -1433,7 +1433,7 @@ static umode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group hwmon_attribute_group = {
+static const struct attribute_group hwmon_attribute_group = {
 	.is_visible = asus_hwmon_sysfs_is_visible,
 	.attrs = hwmon_attributes
 };
@@ -1821,7 +1821,7 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group platform_attribute_group = {
+static const struct attribute_group platform_attribute_group = {
 	.is_visible = asus_sysfs_is_visible,
 	.attrs = platform_attributes
 };

diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index e1c2b6d..a8e4a53 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c

@@ -718,7 +718,7 @@ static struct attribute *compal_platform_attrs[] = {
 	&dev_attr_wake_up_mouse.attr,
 	NULL
 };
-static struct attribute_group compal_platform_attr_group = {
+static const struct attribute_group compal_platform_attr_group = {
 	.attrs = compal_platform_attrs
 };
 

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index c1a8528..85de30f 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c

@@ -317,7 +317,7 @@ static struct attribute *fujitsu_pf_attributes[] = {
 	NULL
 };
 
-static struct attribute_group fujitsu_pf_attribute_group = {
+static const struct attribute_group fujitsu_pf_attribute_group = {
 	.attrs = fujitsu_pf_attributes
 };
 
@@ -695,6 +695,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	if (call_fext_func(device,
 			   FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::logolamp";
 		led->brightness_set_blocking = logolamp_set;
 		led->brightness_get = logolamp_get;
@@ -707,6 +710,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 			    FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
 	    (call_fext_func(device, FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::kblamps";
 		led->brightness_set_blocking = kblamps_set;
 		led->brightness_get = kblamps_get;
@@ -723,6 +729,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	 */
 	if (call_fext_func(device, FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::radio_led";
 		led->brightness_set_blocking = radio_led_set;
 		led->brightness_get = radio_led_get;
@@ -741,6 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 	    (call_fext_func(device,
 			    FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
 		led = devm_kzalloc(&device->dev, sizeof(*led), GFP_KERNEL);
+		if (!led)
+			return -ENOMEM;
+
 		led->name = "fujitsu::eco_led";
 		led->brightness_set_blocking = eco_led_set;
 		led->brightness_get = eco_led_get;

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 527e5d9..603fc60 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c

@@ -909,17 +909,94 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 		},
 	},
 	{
+		.ident = "Lenovo V310-14IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-14IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo V310-14ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-14ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo V310-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15IKB"),
+		},
+	},
+	{
 		.ident = "Lenovo V310-15ISK",
 		.matches = {
-		        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15ISK"),
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V310-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo V510-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo V510-15IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 300-15IBR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300-15IBR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 300-15IKB",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300-15IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 300S-11IBR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 300S-11BR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 310-15ABR",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15ABR"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 310-15IAP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IAP"),
 		},
 	},
 	{
 		.ident = "Lenovo ideapad 310-15IKB",
 		.matches = {
-		        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IKB"),
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15IKB"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad 310-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 310-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad Y700-14ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-14ISK"),
 		},
 	},
 	{

diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index 4cc2f4e..cd21df9 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c

@@ -710,6 +710,24 @@ static const struct file_operations telem_socstate_ops = {
 	.release	= single_release,
 };
 
+static int telem_s0ix_res_get(void *data, u64 *val)
+{
+	u64 s0ix_total_res;
+	int ret;
+
+	ret = intel_pmc_s0ix_counter_read(&s0ix_total_res);
+	if (ret) {
+		pr_err("Failed to read S0ix residency");
+		return ret;
+	}
+
+	*val = s0ix_total_res;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(telem_s0ix_fops, telem_s0ix_res_get, NULL, "%llu\n");
+
 static int telem_pss_trc_verb_show(struct seq_file *s, void *unused)
 {
 	u32 verbosity;
@@ -938,7 +956,7 @@ static struct notifier_block pm_notifier = {
 static int __init telemetry_debugfs_init(void)
 {
 	const struct x86_cpu_id *id;
-	int err = -ENOMEM;
+	int err;
 	struct dentry *f;
 
 	/* Only APL supported for now */
@@ -958,11 +976,10 @@ static int __init telemetry_debugfs_init(void)
 
 	register_pm_notifier(&pm_notifier);
 
+	err = -ENOMEM;
 	debugfs_conf->telemetry_dbg_dir = debugfs_create_dir("telemetry", NULL);
-	if (!debugfs_conf->telemetry_dbg_dir) {
-		err = -ENOMEM;
+	if (!debugfs_conf->telemetry_dbg_dir)
 		goto out_pm;
-	}
 
 	f = debugfs_create_file("pss_info", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
@@ -988,6 +1005,14 @@ static int __init telemetry_debugfs_init(void)
 		goto out;
 	}
 
+	f = debugfs_create_file("s0ix_residency_usec", S_IFREG | S_IRUGO,
+				debugfs_conf->telemetry_dbg_dir,
+				NULL, &telem_s0ix_fops);
+	if (!f) {
+		pr_err("s0ix_residency_usec debugfs register failed\n");
+		goto out;
+	}
+
 	f = debugfs_create_file("pss_trace_verbosity", S_IFREG | S_IRUGO,
 				debugfs_conf->telemetry_dbg_dir, NULL,
 				&telem_pss_trc_verb_ops);

diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index 76b0a58..5c39b32 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c

@@ -437,7 +437,7 @@ static struct attribute *pcc_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group pcc_attr_group = {
+static const struct attribute_group pcc_attr_group = {
 	.name	= NULL,		/* put in device directory */
 	.attrs	= pcc_sysfs_entries,
 };

diff --git a/drivers/platform/x86/peaq-wmi.c b/drivers/platform/x86/peaq-wmi.c
index ca75b4dc..77d1f90 100644
--- a/drivers/platform/x86/peaq-wmi.c
+++ b/drivers/platform/x86/peaq-wmi.c

@@ -51,7 +51,7 @@ static void peaq_wmi_poll(struct input_polled_dev *dev)
 		return;
 	}
 
-	if (peaq_ignore_events_counter && --peaq_ignore_events_counter > 0)
+	if (peaq_ignore_events_counter && --peaq_ignore_events_counter >= 0)
 		return;
 
 	if (obj.integer.value) {

diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 5c4dfe4..0c703fe 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c

@@ -1232,7 +1232,7 @@ static umode_t samsung_sysfs_is_visible(struct kobject *kobj,
 	return ok ? attr->mode : 0;
 }
 
-static struct attribute_group platform_attribute_group = {
+static const struct attribute_group platform_attribute_group = {
 	.is_visible = samsung_sysfs_is_visible,
 	.attrs = platform_attributes
 };

diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
index 3cd3bdf..1157a7b 100644
--- a/drivers/platform/x86/silead_dmi.c
+++ b/drivers/platform/x86/silead_dmi.c

@@ -122,6 +122,20 @@ static const struct silead_ts_dmi_data pov_mobii_wintab_p800w_data = {
 	.properties	= pov_mobii_wintab_p800w_props,
 };
 
+static const struct property_entry itworks_tw891_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1600),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 890),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-itworks-tw891.fw"),
+	{ }
+};
+
+static const struct silead_ts_dmi_data itworks_tw891_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= itworks_tw891_props,
+};
+
 static const struct dmi_system_id silead_ts_dmi_table[] = {
 	{
 		/* CUBE iwork8 Air */
@@ -160,6 +174,16 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 		},
 	},
 	{
+		/* Ployer Momo7w (same hardware as the Trekstor ST70416-6) */
+		.driver_data = (void *)&surftab_wintron70_st70416_6_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Shenzhen PLOYER"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MOMO7W"),
+			/* Exact match, different versions need different fw */
+			DMI_MATCH(DMI_BIOS_VERSION, "MOMO.G.WI71C.MABMRBA02"),
+		},
+	},
+	{
 		/* GP-electronic T701 */
 		.driver_data = (void *)&gp_electronic_t701_data,
 		.matches = {
@@ -187,6 +211,14 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"),
 		},
 	},
+	{
+		/* I.T.Works TW891 */
+		.driver_data = (void *)&itworks_tw891_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TW891"),
+		},
+	},
 	{ },
 };
 

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 88f9f79..bb1dcd7 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c

@@ -2419,7 +2419,7 @@ static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
 	return exists ? attr->mode : 0;
 }
 
-static struct attribute_group toshiba_attr_group = {
+static const struct attribute_group toshiba_attr_group = {
 	.is_visible = toshiba_sysfs_is_visible,
 	.attrs = toshiba_attributes,
 };

diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c
index 3de802f..9dff1b4 100644
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c

@@ -980,10 +980,37 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, bci);
 
+	INIT_WORK(&bci->work, twl4030_bci_usb_work);
+	INIT_DELAYED_WORK(&bci->current_worker, twl4030_current_worker);
+
 	bci->channel_vac = devm_iio_channel_get(&pdev->dev, "vac");
 	if (IS_ERR(bci->channel_vac)) {
+		ret = PTR_ERR(bci->channel_vac);
+		if (ret == -EPROBE_DEFER)
+			return ret;	/* iio not ready */
+		dev_warn(&pdev->dev, "could not request vac iio channel (%d)",
+			ret);
 		bci->channel_vac = NULL;
-		dev_warn(&pdev->dev, "could not request vac iio channel");
+	}
+
+	if (bci->dev->of_node) {
+		struct device_node *phynode;
+
+		phynode = of_find_compatible_node(bci->dev->of_node->parent,
+						  NULL, "ti,twl4030-usb");
+		if (phynode) {
+			bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
+			bci->transceiver = devm_usb_get_phy_by_node(
+				bci->dev, phynode, &bci->usb_nb);
+			if (IS_ERR(bci->transceiver)) {
+				ret = PTR_ERR(bci->transceiver);
+				if (ret == -EPROBE_DEFER)
+					return ret;	/* phy not ready */
+				dev_warn(&pdev->dev, "could not request transceiver (%d)",
+					ret);
+				bci->transceiver = NULL;
+			}
+		}
 	}
 
 	bci->ac = devm_power_supply_register(&pdev->dev, &twl4030_bci_ac_desc,
@@ -1019,20 +1046,6 @@ static int twl4030_bci_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	INIT_WORK(&bci->work, twl4030_bci_usb_work);
-	INIT_DELAYED_WORK(&bci->current_worker, twl4030_current_worker);
-
-	bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
-	if (bci->dev->of_node) {
-		struct device_node *phynode;
-
-		phynode = of_find_compatible_node(bci->dev->of_node->parent,
-						  NULL, "ti,twl4030-usb");
-		if (phynode)
-			bci->transceiver = devm_usb_get_phy_by_node(
-				bci->dev, phynode, &bci->usb_nb);
-	}
-
 	/* Enable interrupts now. */
 	reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_ICHGEOC | TWL4030_TBATOR2 |
 		TWL4030_TBATOR1 | TWL4030_BATSTS);

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index a0860b3..1581f6a 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c

@@ -678,7 +678,9 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
 
 	pc = of_node_to_pwmchip(args.np);
 	if (IS_ERR(pc)) {
-		pr_err("%s(): PWM chip not found\n", __func__);
+		if (PTR_ERR(pc) != -EPROBE_DEFER)
+			pr_err("%s(): PWM chip not found\n", __func__);
+
 		pwm = ERR_CAST(pc);
 		goto put;
 	}

diff --git a/drivers/pwm/pwm-bfin.c b/drivers/pwm/pwm-bfin.c
index d2ed0a2..a9a8813 100644
--- a/drivers/pwm/pwm-bfin.c
+++ b/drivers/pwm/pwm-bfin.c

@@ -118,10 +118,8 @@ static int bfin_pwm_probe(struct platform_device *pdev)
 	int ret;
 
 	pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
-	if (!pwm) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!pwm)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(pdev, pwm);
 

diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index f6ca4e8..9c13694 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c

@@ -75,8 +75,8 @@ static int __cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index,
 
 	msg->version = 0;
 	msg->command = EC_CMD_PWM_GET_DUTY;
-	msg->insize = sizeof(*params);
-	msg->outsize = sizeof(*resp);
+	msg->insize = sizeof(*resp);
+	msg->outsize = sizeof(*params);
 
 	params->pwm_type = EC_PWM_TYPE_GENERIC;
 	params->index = index;

diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index d0e8f85..8dadc58 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c

@@ -165,7 +165,7 @@ static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
-static struct pwm_ops hibvt_pwm_ops = {
+static const struct pwm_ops hibvt_pwm_ops = {
 	.get_state = hibvt_pwm_get_state,
 	.apply = hibvt_pwm_apply,
 

diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 045ef9f..cb845ed 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c

@@ -103,6 +103,7 @@ struct meson_pwm_channel {
 
 struct meson_pwm_data {
 	const char * const *parent_names;
+	unsigned int num_parents;
 };
 
 struct meson_pwm {
@@ -162,7 +163,8 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 			  unsigned int duty, unsigned int period)
 {
 	unsigned int pre_div, cnt, duty_cnt;
-	unsigned long fin_freq = -1, fin_ns;
+	unsigned long fin_freq = -1;
+	u64 fin_ps;
 
 	if (~(meson->inverter_mask >> id) & 0x1)
 		duty = period - duty;
@@ -178,13 +180,15 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 	}
 
 	dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq);
-	fin_ns = NSEC_PER_SEC / fin_freq;
+	fin_ps = (u64)NSEC_PER_SEC * 1000;
+	do_div(fin_ps, fin_freq);
 
 	/* Calc pre_div with the period */
 	for (pre_div = 0; pre_div < MISC_CLK_DIV_MASK; pre_div++) {
-		cnt = DIV_ROUND_CLOSEST(period, fin_ns * (pre_div + 1));
-		dev_dbg(meson->chip.dev, "fin_ns=%lu pre_div=%u cnt=%u\n",
-			fin_ns, pre_div, cnt);
+		cnt = DIV_ROUND_CLOSEST_ULL((u64)period * 1000,
+					    fin_ps * (pre_div + 1));
+		dev_dbg(meson->chip.dev, "fin_ps=%llu pre_div=%u cnt=%u\n",
+			fin_ps, pre_div, cnt);
 		if (cnt <= 0xffff)
 			break;
 	}
@@ -207,7 +211,8 @@ static int meson_pwm_calc(struct meson_pwm *meson,
 		channel->lo = cnt;
 	} else {
 		/* Then check is we can have the duty with the same pre_div */
-		duty_cnt = DIV_ROUND_CLOSEST(duty, fin_ns * (pre_div + 1));
+		duty_cnt = DIV_ROUND_CLOSEST_ULL((u64)duty * 1000,
+						 fin_ps * (pre_div + 1));
 		if (duty_cnt > 0xffff) {
 			dev_err(meson->chip.dev, "unable to get duty cycle\n");
 			return -EINVAL;
@@ -381,6 +386,7 @@ static const char * const pwm_meson8b_parent_names[] = {
 
 static const struct meson_pwm_data pwm_meson8b_data = {
 	.parent_names = pwm_meson8b_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_meson8b_parent_names),
 };
 
 static const char * const pwm_gxbb_parent_names[] = {
@@ -389,11 +395,35 @@ static const char * const pwm_gxbb_parent_names[] = {
 
 static const struct meson_pwm_data pwm_gxbb_data = {
 	.parent_names = pwm_gxbb_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_gxbb_parent_names),
+};
+
+/*
+ * Only the 2 first inputs of the GXBB AO PWMs are valid
+ * The last 2 are grounded
+ */
+static const char * const pwm_gxbb_ao_parent_names[] = {
+	"xtal", "clk81"
+};
+
+static const struct meson_pwm_data pwm_gxbb_ao_data = {
+	.parent_names = pwm_gxbb_ao_parent_names,
+	.num_parents = ARRAY_SIZE(pwm_gxbb_ao_parent_names),
 };
 
 static const struct of_device_id meson_pwm_matches[] = {
-	{ .compatible = "amlogic,meson8b-pwm", .data = &pwm_meson8b_data },
-	{ .compatible = "amlogic,meson-gxbb-pwm", .data = &pwm_gxbb_data },
+	{
+		.compatible = "amlogic,meson8b-pwm",
+		.data = &pwm_meson8b_data
+	},
+	{
+		.compatible = "amlogic,meson-gxbb-pwm",
+		.data = &pwm_gxbb_data
+	},
+	{
+		.compatible = "amlogic,meson-gxbb-ao-pwm",
+		.data = &pwm_gxbb_ao_data
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, meson_pwm_matches);
@@ -417,7 +447,7 @@ static int meson_pwm_init_channels(struct meson_pwm *meson,
 		init.ops = &clk_mux_ops;
 		init.flags = CLK_IS_BASIC;
 		init.parent_names = meson->data->parent_names;
-		init.num_parents = 1 << MISC_CLK_SEL_WIDTH;
+		init.num_parents = meson->data->num_parents;
 
 		channel->mux.reg = meson->base + REG_MISC_AB;
 		channel->mux.shift = mux_reg_shifts[i];

diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index 1284ffa..6d23f1d 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c

@@ -8,8 +8,10 @@
 
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -44,6 +46,10 @@
 
 #define PWM_DTY_MASK		GENMASK(15, 0)
 
+#define PWM_REG_PRD(reg)	((((reg) >> 16) & PWM_PRD_MASK) + 1)
+#define PWM_REG_DTY(reg)	((reg) & PWM_DTY_MASK)
+#define PWM_REG_PRESCAL(reg, chan)	(((reg) >> ((chan) * PWMCH_OFFSET)) & PWM_PRESCAL_MASK)
+
 #define BIT_CH(bit, chan)	((bit) << ((chan) * PWMCH_OFFSET))
 
 static const u32 prescaler_table[] = {
@@ -77,6 +83,8 @@ struct sun4i_pwm_chip {
 	void __iomem *base;
 	spinlock_t ctrl_lock;
 	const struct sun4i_pwm_data *data;
+	unsigned long next_period[2];
+	bool needs_delay[2];
 };
 
 static inline struct sun4i_pwm_chip *to_sun4i_pwm_chip(struct pwm_chip *chip)
@@ -96,26 +104,65 @@ static inline void sun4i_pwm_writel(struct sun4i_pwm_chip *chip,
 	writel(val, chip->base + offset);
 }
 
-static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			    int duty_ns, int period_ns)
+static void sun4i_pwm_get_state(struct pwm_chip *chip,
+				struct pwm_device *pwm,
+				struct pwm_state *state)
 {
 	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 prd, dty, val, clk_gate;
+	u64 clk_rate, tmp;
+	u32 val;
+	unsigned int prescaler;
+
+	clk_rate = clk_get_rate(sun4i_pwm->clk);
+
+	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+
+	if ((val == PWM_PRESCAL_MASK) && sun4i_pwm->data->has_prescaler_bypass)
+		prescaler = 1;
+	else
+		prescaler = prescaler_table[PWM_REG_PRESCAL(val, pwm->hwpwm)];
+
+	if (prescaler == 0)
+		return;
+
+	if (val & BIT_CH(PWM_ACT_STATE, pwm->hwpwm))
+		state->polarity = PWM_POLARITY_NORMAL;
+	else
+		state->polarity = PWM_POLARITY_INVERSED;
+
+	if (val & BIT_CH(PWM_CLK_GATING | PWM_EN, pwm->hwpwm))
+		state->enabled = true;
+	else
+		state->enabled = false;
+
+	val = sun4i_pwm_readl(sun4i_pwm, PWM_CH_PRD(pwm->hwpwm));
+
+	tmp = prescaler * NSEC_PER_SEC * PWM_REG_DTY(val);
+	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate);
+
+	tmp = prescaler * NSEC_PER_SEC * PWM_REG_PRD(val);
+	state->period = DIV_ROUND_CLOSEST_ULL(tmp, clk_rate);
+}
+
+static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
+			       struct pwm_state *state,
+			       u32 *dty, u32 *prd, unsigned int *prsclr)
+{
 	u64 clk_rate, div = 0;
-	unsigned int prescaler = 0;
-	int err;
+	unsigned int pval, prescaler = 0;
 
 	clk_rate = clk_get_rate(sun4i_pwm->clk);
 
 	if (sun4i_pwm->data->has_prescaler_bypass) {
 		/* First, test without any prescaler when available */
 		prescaler = PWM_PRESCAL_MASK;
+		pval = 1;
 		/*
 		 * When not using any prescaler, the clock period in nanoseconds
 		 * is not an integer so round it half up instead of
 		 * truncating to get less surprising values.
 		 */
-		div = clk_rate * period_ns + NSEC_PER_SEC / 2;
+		div = clk_rate * state->period + NSEC_PER_SEC / 2;
 		do_div(div, NSEC_PER_SEC);
 		if (div - 1 > PWM_PRD_MASK)
 			prescaler = 0;
@@ -126,137 +173,141 @@ static int sun4i_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 		for (prescaler = 0; prescaler < PWM_PRESCAL_MASK; prescaler++) {
 			if (!prescaler_table[prescaler])
 				continue;
+			pval = prescaler_table[prescaler];
 			div = clk_rate;
-			do_div(div, prescaler_table[prescaler]);
-			div = div * period_ns;
+			do_div(div, pval);
+			div = div * state->period;
 			do_div(div, NSEC_PER_SEC);
 			if (div - 1 <= PWM_PRD_MASK)
 				break;
 		}
 
-		if (div - 1 > PWM_PRD_MASK) {
-			dev_err(chip->dev, "period exceeds the maximum value\n");
+		if (div - 1 > PWM_PRD_MASK)
 			return -EINVAL;
+	}
+
+	*prd = div;
+	div *= state->duty_cycle;
+	do_div(div, state->period);
+	*dty = div;
+	*prsclr = prescaler;
+
+	div = (u64)pval * NSEC_PER_SEC * *prd;
+	state->period = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
+
+	div = (u64)pval * NSEC_PER_SEC * *dty;
+	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(div, clk_rate);
+
+	return 0;
+}
+
+static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			   struct pwm_state *state)
+{
+	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
+	struct pwm_state cstate;
+	u32 ctrl;
+	int ret;
+	unsigned int delay_us;
+	unsigned long now;
+
+	pwm_get_state(pwm, &cstate);
+
+	if (!cstate.enabled) {
+		ret = clk_prepare_enable(sun4i_pwm->clk);
+		if (ret) {
+			dev_err(chip->dev, "failed to enable PWM clock\n");
+			return ret;
 		}
 	}
 
-	prd = div;
-	div *= duty_ns;
-	do_div(div, period_ns);
-	dty = div;
-
-	err = clk_prepare_enable(sun4i_pwm->clk);
-	if (err) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return err;
-	}
-
 	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+	ctrl = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
 
-	if (sun4i_pwm->data->has_rdy && (val & PWM_RDY(pwm->hwpwm))) {
-		spin_unlock(&sun4i_pwm->ctrl_lock);
-		clk_disable_unprepare(sun4i_pwm->clk);
-		return -EBUSY;
+	if ((cstate.period != state->period) ||
+	    (cstate.duty_cycle != state->duty_cycle)) {
+		u32 period, duty, val;
+		unsigned int prescaler;
+
+		ret = sun4i_pwm_calculate(sun4i_pwm, state,
+					  &duty, &period, &prescaler);
+		if (ret) {
+			dev_err(chip->dev, "period exceeds the maximum value\n");
+			spin_unlock(&sun4i_pwm->ctrl_lock);
+			if (!cstate.enabled)
+				clk_disable_unprepare(sun4i_pwm->clk);
+			return ret;
+		}
+
+		if (PWM_REG_PRESCAL(ctrl, pwm->hwpwm) != prescaler) {
+			/* Prescaler changed, the clock has to be gated */
+			ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+			sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
+
+			ctrl &= ~BIT_CH(PWM_PRESCAL_MASK, pwm->hwpwm);
+			ctrl |= BIT_CH(prescaler, pwm->hwpwm);
+		}
+
+		val = (duty & PWM_DTY_MASK) | PWM_PRD(period);
+		sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm));
+		sun4i_pwm->next_period[pwm->hwpwm] = jiffies +
+			usecs_to_jiffies(cstate.period / 1000 + 1);
+		sun4i_pwm->needs_delay[pwm->hwpwm] = true;
 	}
 
-	clk_gate = val & BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	if (clk_gate) {
-		val &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-		sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	}
-
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val &= ~BIT_CH(PWM_PRESCAL_MASK, pwm->hwpwm);
-	val |= BIT_CH(prescaler, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-
-	val = (dty & PWM_DTY_MASK) | PWM_PRD(prd);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CH_PRD(pwm->hwpwm));
-
-	if (clk_gate) {
-		val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-		val |= clk_gate;
-		sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-	}
-
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-	clk_disable_unprepare(sun4i_pwm->clk);
-
-	return 0;
-}
-
-static int sun4i_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
-				  enum pwm_polarity polarity)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
-	int ret;
-
-	ret = clk_prepare_enable(sun4i_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
-	}
-
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-
-	if (polarity != PWM_POLARITY_NORMAL)
-		val &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
+	if (state->polarity != PWM_POLARITY_NORMAL)
+		ctrl &= ~BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
 	else
-		val |= BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
+		ctrl |= BIT_CH(PWM_ACT_STATE, pwm->hwpwm);
 
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
-
-	spin_unlock(&sun4i_pwm->ctrl_lock);
-	clk_disable_unprepare(sun4i_pwm->clk);
-
-	return 0;
-}
-
-static int sun4i_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
-	int ret;
-
-	ret = clk_prepare_enable(sun4i_pwm->clk);
-	if (ret) {
-		dev_err(chip->dev, "failed to enable PWM clock\n");
-		return ret;
+	ctrl |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+	if (state->enabled) {
+		ctrl |= BIT_CH(PWM_EN, pwm->hwpwm);
+	} else if (!sun4i_pwm->needs_delay[pwm->hwpwm]) {
+		ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm);
+		ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
 	}
 
-	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val |= BIT_CH(PWM_EN, pwm->hwpwm);
-	val |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
+	sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
+
 	spin_unlock(&sun4i_pwm->ctrl_lock);
 
-	return 0;
-}
+	if (state->enabled)
+		return 0;
 
-static void sun4i_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
-	u32 val;
+	if (!sun4i_pwm->needs_delay[pwm->hwpwm]) {
+		clk_disable_unprepare(sun4i_pwm->clk);
+		return 0;
+	}
+
+	/* We need a full period to elapse before disabling the channel. */
+	now = jiffies;
+	if (sun4i_pwm->needs_delay[pwm->hwpwm] &&
+	    time_before(now, sun4i_pwm->next_period[pwm->hwpwm])) {
+		delay_us = jiffies_to_usecs(sun4i_pwm->next_period[pwm->hwpwm] -
+					   now);
+		if ((delay_us / 500) > MAX_UDELAY_MS)
+			msleep(delay_us / 1000 + 1);
+		else
+			usleep_range(delay_us, delay_us * 2);
+	}
+	sun4i_pwm->needs_delay[pwm->hwpwm] = false;
 
 	spin_lock(&sun4i_pwm->ctrl_lock);
-	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
-	val &= ~BIT_CH(PWM_EN, pwm->hwpwm);
-	val &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-	sun4i_pwm_writel(sun4i_pwm, val, PWM_CTRL_REG);
+	ctrl = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
+	ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
+	ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm);
+	sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
 	spin_unlock(&sun4i_pwm->ctrl_lock);
 
 	clk_disable_unprepare(sun4i_pwm->clk);
+
+	return 0;
 }
 
 static const struct pwm_ops sun4i_pwm_ops = {
-	.config = sun4i_pwm_config,
-	.set_polarity = sun4i_pwm_set_polarity,
-	.enable = sun4i_pwm_enable,
-	.disable = sun4i_pwm_disable,
+	.apply = sun4i_pwm_apply,
+	.get_state = sun4i_pwm_get_state,
 	.owner = THIS_MODULE,
 };
 
@@ -316,8 +367,7 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
 {
 	struct sun4i_pwm_chip *pwm;
 	struct resource *res;
-	u32 val;
-	int i, ret;
+	int ret;
 	const struct of_device_id *match;
 
 	match = of_match_device(sun4i_pwm_dt_ids, &pdev->dev);
@@ -353,24 +403,7 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, pwm);
 
-	ret = clk_prepare_enable(pwm->clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable PWM clock\n");
-		goto clk_error;
-	}
-
-	val = sun4i_pwm_readl(pwm, PWM_CTRL_REG);
-	for (i = 0; i < pwm->chip.npwm; i++)
-		if (!(val & BIT_CH(PWM_ACT_STATE, i)))
-			pwm_set_polarity(&pwm->chip.pwms[i],
-					 PWM_POLARITY_INVERSED);
-	clk_disable_unprepare(pwm->clk);
-
 	return 0;
-
-clk_error:
-	pwmchip_remove(&pwm->chip);
-	return ret;
 }
 
 static int sun4i_pwm_remove(struct platform_device *pdev)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index 8c6ed55..e9b33f0 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c

@@ -41,6 +41,9 @@
 
 struct tegra_pwm_soc {
 	unsigned int num_channels;
+
+	/* Maximum IP frequency for given SoCs */
+	unsigned long max_frequency;
 };
 
 struct tegra_pwm_chip {
@@ -201,7 +204,18 @@ static int tegra_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(pwm->clk))
 		return PTR_ERR(pwm->clk);
 
-	/* Read PWM clock rate from source */
+	/* Set maximum frequency of the IP */
+	ret = clk_set_rate(pwm->clk, pwm->soc->max_frequency);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to set max frequency: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * The requested and configured frequency may differ due to
+	 * clock register resolutions. Get the configured frequency
+	 * so that PWM period can be calculated more accurately.
+	 */
 	pwm->clk_rate = clk_get_rate(pwm->clk);
 
 	pwm->rst = devm_reset_control_get(&pdev->dev, "pwm");
@@ -273,10 +287,12 @@ static int tegra_pwm_resume(struct device *dev)
 
 static const struct tegra_pwm_soc tegra20_pwm_soc = {
 	.num_channels = 4,
+	.max_frequency = 48000000UL,
 };
 
 static const struct tegra_pwm_soc tegra186_pwm_soc = {
 	.num_channels = 1,
+	.max_frequency = 102000000UL,
 };
 
 static const struct of_device_id tegra_pwm_of_match[] = {

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 8d3b957..72419ac 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig

@@ -77,6 +77,14 @@
 	  Say yes here to enable debugging support in the RTC framework
 	  and individual RTC drivers.
 
+config RTC_NVMEM
+	bool "RTC non volatile storage support"
+	select NVMEM
+	default RTC_CLASS
+	help
+	  Say yes here to add support for the non volatile (often battery
+	  backed) storage present on RTCs.
+
 comment "RTC interfaces"
 
 config RTC_INTF_SYSFS
@@ -197,6 +205,17 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ac100.
 
+config RTC_DRV_BRCMSTB
+	tristate "Broadcom STB wake-timer"
+	depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST
+	default ARCH_BRCMSTB || BMIPS_GENERIC
+	help
+	  If you say yes here you get support for the wake-timer found on
+	  Broadcom STB SoCs (BCM7xxx).
+
+	  This driver can also be built as a module. If so, the module will
+	  be called rtc-brcmstb-waketimer.
+
 config RTC_DRV_AS3722
 	tristate "ams AS3722 RTC driver"
 	depends on MFD_AS3722
@@ -791,6 +810,14 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called rtc-ds3232.
 
+config RTC_DRV_DS3232_HWMON
+	bool "HWMON support for Dallas/Maxim DS3232/DS3234"
+	depends on RTC_DRV_DS3232 && HWMON && !(RTC_DRV_DS3232=y && HWMON=m)
+	default y
+	help
+	  Say Y here if you want to expose temperature sensor data on
+	  rtc-ds3232
+
 config RTC_DRV_PCF2127
 	tristate "NXP PCF2127"
 	depends on RTC_I2C_AND_SPI
@@ -1484,16 +1511,16 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called armada38x-rtc.
 
-config RTC_DRV_GEMINI
-	tristate "Gemini SoC RTC"
-	depends on ARCH_GEMINI || COMPILE_TEST
+config RTC_DRV_FTRTC010
+	tristate "Faraday Technology FTRTC010 RTC"
 	depends on HAS_IOMEM
+	default ARCH_GEMINI
 	help
 	  If you say Y here you will get support for the
-	  RTC found on Gemini SoC's.
+	  Faraday Technolog FTRTC010 found on e.g. Gemini SoC's.
 
 	  This driver can also be built as a module. If so, the module
-	  will be called rtc-gemini.
+	  will be called rtc-ftrtc010.
 
 config RTC_DRV_PS3
 	tristate "PS3 RTC"

diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 13857d2..acd366b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile

@@ -15,6 +15,7 @@
 rtc-core-y			+= rtc-efi-platform.o
 endif
 
+rtc-core-$(CONFIG_RTC_NVMEM)		+= nvmem.o
 rtc-core-$(CONFIG_RTC_INTF_DEV)		+= rtc-dev.o
 rtc-core-$(CONFIG_RTC_INTF_PROC)	+= rtc-proc.o
 rtc-core-$(CONFIG_RTC_INTF_SYSFS)	+= rtc-sysfs.o
@@ -36,6 +37,7 @@
 obj-$(CONFIG_RTC_DRV_AT91SAM9)	+= rtc-at91sam9.o
 obj-$(CONFIG_RTC_DRV_AU1XXX)	+= rtc-au1xxx.o
 obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
+obj-$(CONFIG_RTC_DRV_BRCMSTB)	+= rtc-brcmstb-waketimer.o
 obj-$(CONFIG_RTC_DRV_BQ32K)	+= rtc-bq32k.o
 obj-$(CONFIG_RTC_DRV_BQ4802)	+= rtc-bq4802.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
@@ -67,7 +69,7 @@
 obj-$(CONFIG_RTC_DRV_EM3027)	+= rtc-em3027.o
 obj-$(CONFIG_RTC_DRV_EP93XX)	+= rtc-ep93xx.o
 obj-$(CONFIG_RTC_DRV_FM3130)	+= rtc-fm3130.o
-obj-$(CONFIG_RTC_DRV_GEMINI)	+= rtc-gemini.o
+obj-$(CONFIG_RTC_DRV_FTRTC010)	+= rtc-ftrtc010.o
 obj-$(CONFIG_RTC_DRV_GENERIC)	+= rtc-generic.o
 obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)	+= rtc-hym8563.o

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 5fb43989..2ed970d 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c

@@ -150,59 +150,19 @@ static SIMPLE_DEV_PM_OPS(rtc_class_dev_pm_ops, rtc_suspend, rtc_resume);
 #define RTC_CLASS_DEV_PM_OPS	NULL
 #endif
 
-
-/**
- * rtc_device_register - register w/ RTC class
- * @dev: the device to register
- *
- * rtc_device_unregister() must be called when the class device is no
- * longer needed.
- *
- * Returns the pointer to the new struct class device.
- */
-struct rtc_device *rtc_device_register(const char *name, struct device *dev,
-					const struct rtc_class_ops *ops,
-					struct module *owner)
+/* Ensure the caller will set the id before releasing the device */
+static struct rtc_device *rtc_allocate_device(void)
 {
 	struct rtc_device *rtc;
-	struct rtc_wkalrm alrm;
-	int of_id = -1, id = -1, err;
 
-	if (dev->of_node)
-		of_id = of_alias_get_id(dev->of_node, "rtc");
-	else if (dev->parent && dev->parent->of_node)
-		of_id = of_alias_get_id(dev->parent->of_node, "rtc");
-
-	if (of_id >= 0) {
-		id = ida_simple_get(&rtc_ida, of_id, of_id + 1,
-				    GFP_KERNEL);
-		if (id < 0)
-			dev_warn(dev, "/aliases ID %d not available\n",
-				    of_id);
-	}
-
-	if (id < 0) {
-		id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
-		if (id < 0) {
-			err = id;
-			goto exit;
-		}
-	}
-
-	rtc = kzalloc(sizeof(struct rtc_device), GFP_KERNEL);
-	if (rtc == NULL) {
-		err = -ENOMEM;
-		goto exit_ida;
-	}
+	rtc = kzalloc(sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return NULL;
 
 	device_initialize(&rtc->dev);
 
-	rtc->id = id;
-	rtc->ops = ops;
-	rtc->owner = owner;
 	rtc->irq_freq = 1;
 	rtc->max_user_freq = 64;
-	rtc->dev.parent = dev;
 	rtc->dev.class = rtc_class;
 	rtc->dev.groups = rtc_get_dev_attribute_groups();
 	rtc->dev.release = rtc_device_release;
@@ -224,7 +184,64 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	rtc->pie_timer.function = rtc_pie_update_irq;
 	rtc->pie_enabled = 0;
 
-	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
+	return rtc;
+}
+
+static int rtc_device_get_id(struct device *dev)
+{
+	int of_id = -1, id = -1;
+
+	if (dev->of_node)
+		of_id = of_alias_get_id(dev->of_node, "rtc");
+	else if (dev->parent && dev->parent->of_node)
+		of_id = of_alias_get_id(dev->parent->of_node, "rtc");
+
+	if (of_id >= 0) {
+		id = ida_simple_get(&rtc_ida, of_id, of_id + 1, GFP_KERNEL);
+		if (id < 0)
+			dev_warn(dev, "/aliases ID %d not available\n", of_id);
+	}
+
+	if (id < 0)
+		id = ida_simple_get(&rtc_ida, 0, 0, GFP_KERNEL);
+
+	return id;
+}
+
+/**
+ * rtc_device_register - register w/ RTC class
+ * @dev: the device to register
+ *
+ * rtc_device_unregister() must be called when the class device is no
+ * longer needed.
+ *
+ * Returns the pointer to the new struct class device.
+ */
+struct rtc_device *rtc_device_register(const char *name, struct device *dev,
+					const struct rtc_class_ops *ops,
+					struct module *owner)
+{
+	struct rtc_device *rtc;
+	struct rtc_wkalrm alrm;
+	int id, err;
+
+	id = rtc_device_get_id(dev);
+	if (id < 0) {
+		err = id;
+		goto exit;
+	}
+
+	rtc = rtc_allocate_device();
+	if (!rtc) {
+		err = -ENOMEM;
+		goto exit_ida;
+	}
+
+	rtc->id = id;
+	rtc->ops = ops;
+	rtc->owner = owner;
+	rtc->dev.parent = dev;
+
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
 	/* Check to see if there is an ALARM already set in hw */
@@ -238,20 +255,20 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
 	if (err) {
 		dev_warn(&rtc->dev, "%s: failed to add char device %d:%d\n",
-			 rtc->name, MAJOR(rtc->dev.devt), rtc->id);
+			 name, MAJOR(rtc->dev.devt), rtc->id);
 
 		/* This will free both memory and the ID */
 		put_device(&rtc->dev);
 		goto exit;
 	} else {
-		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", rtc->name,
+		dev_dbg(&rtc->dev, "%s: dev (%d:%d)\n", name,
 			MAJOR(rtc->dev.devt), rtc->id);
 	}
 
 	rtc_proc_add_device(rtc);
 
 	dev_info(dev, "rtc core: registered %s as %s\n",
-			rtc->name, dev_name(&rtc->dev));
+			name, dev_name(&rtc->dev));
 
 	return rtc;
 
@@ -273,6 +290,8 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
+	rtc_nvmem_unregister(rtc);
+
 	mutex_lock(&rtc->ops_lock);
 	/*
 	 * Remove innards of this RTC, then disable it, before
@@ -356,6 +375,91 @@ void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc)
 }
 EXPORT_SYMBOL_GPL(devm_rtc_device_unregister);
 
+static void devm_rtc_release_device(struct device *dev, void *res)
+{
+	struct rtc_device *rtc = *(struct rtc_device **)res;
+
+	if (rtc->registered)
+		rtc_device_unregister(rtc);
+	else
+		put_device(&rtc->dev);
+}
+
+struct rtc_device *devm_rtc_allocate_device(struct device *dev)
+{
+	struct rtc_device **ptr, *rtc;
+	int id, err;
+
+	id = rtc_device_get_id(dev);
+	if (id < 0)
+		return ERR_PTR(id);
+
+	ptr = devres_alloc(devm_rtc_release_device, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr) {
+		err = -ENOMEM;
+		goto exit_ida;
+	}
+
+	rtc = rtc_allocate_device();
+	if (!rtc) {
+		err = -ENOMEM;
+		goto exit_devres;
+	}
+
+	*ptr = rtc;
+	devres_add(dev, ptr);
+
+	rtc->id = id;
+	rtc->dev.parent = dev;
+	dev_set_name(&rtc->dev, "rtc%d", id);
+
+	return rtc;
+
+exit_devres:
+	devres_free(ptr);
+exit_ida:
+	ida_simple_remove(&rtc_ida, id);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devm_rtc_allocate_device);
+
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
+{
+	struct rtc_wkalrm alrm;
+	int err;
+
+	if (!rtc->ops)
+		return -EINVAL;
+
+	rtc->owner = owner;
+
+	/* Check to see if there is an ALARM already set in hw */
+	err = __rtc_read_alarm(rtc, &alrm);
+	if (!err && !rtc_valid_tm(&alrm.time))
+		rtc_initialize_alarm(rtc, &alrm);
+
+	rtc_dev_prepare(rtc);
+
+	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
+	if (err)
+		dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n",
+			 MAJOR(rtc->dev.devt), rtc->id);
+	else
+		dev_dbg(rtc->dev.parent, "char device (%d:%d)\n",
+			MAJOR(rtc->dev.devt), rtc->id);
+
+	rtc_proc_add_device(rtc);
+
+	rtc_nvmem_register(rtc);
+
+	rtc->registered = true;
+	dev_info(rtc->dev.parent, "registered as %s\n",
+		 dev_name(&rtc->dev));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtc_register_device);
+
 static int __init rtc_init(void)
 {
 	rtc_class = class_create(THIS_MODULE, "rtc");

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index fc0fa75..8cec9a0 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c

@@ -227,6 +227,13 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 			missing = year;
 	}
 
+	/* Can't proceed if alarm is still invalid after replacing
+	 * missing fields.
+	 */
+	err = rtc_valid_tm(&alarm->time);
+	if (err)
+		goto done;
+
 	/* with luck, no rollover is needed */
 	t_now = rtc_tm_to_time64(&now);
 	t_alm = rtc_tm_to_time64(&alarm->time);
@@ -278,9 +285,9 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		dev_warn(&rtc->dev, "alarm rollover not handled\n");
 	}
 
-done:
 	err = rtc_valid_tm(&alarm->time);
 
+done:
 	if (err) {
 		dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n",
 			alarm->time.tm_year + 1900, alarm->time.tm_mon + 1,

diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c
new file mode 100644
index 0000000..8567b4e
--- /dev/null
+++ b/drivers/rtc/nvmem.c

@@ -0,0 +1,113 @@
+/*
+ * RTC subsystem, nvmem interface
+ *
+ * Copyright (C) 2017 Alexandre Belloni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/rtc.h>
+#include <linux/sysfs.h>
+
+#include "rtc-core.h"
+
+/*
+ * Deprecated ABI compatibility, this should be removed at some point
+ */
+
+static const char nvram_warning[] = "Deprecated ABI, please use nvmem";
+
+static ssize_t
+rtc_nvram_read(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *attr,
+	       char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_read(rtc->nvmem, off, count, buf);
+}
+
+static ssize_t
+rtc_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct rtc_device *rtc = attr->private;
+
+	dev_warn_once(kobj_to_dev(kobj), nvram_warning);
+
+	return nvmem_device_write(rtc->nvmem, off, count, buf);
+}
+
+static int rtc_nvram_register(struct rtc_device *rtc)
+{
+	int err;
+
+	rtc->nvram = devm_kzalloc(rtc->dev.parent,
+				sizeof(struct bin_attribute),
+				GFP_KERNEL);
+	if (!rtc->nvram)
+		return -ENOMEM;
+
+	rtc->nvram->attr.name = "nvram";
+	rtc->nvram->attr.mode = 0644;
+	rtc->nvram->private = rtc;
+
+	sysfs_bin_attr_init(rtc->nvram);
+
+	rtc->nvram->read = rtc_nvram_read;
+	rtc->nvram->write = rtc_nvram_write;
+	rtc->nvram->size = rtc->nvmem_config->size;
+
+	err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
+				    rtc->nvram);
+	if (err) {
+		devm_kfree(rtc->dev.parent, rtc->nvram);
+		rtc->nvram = NULL;
+	}
+
+	return err;
+}
+
+static void rtc_nvram_unregister(struct rtc_device *rtc)
+{
+	sysfs_remove_bin_file(&rtc->dev.parent->kobj, rtc->nvram);
+}
+
+/*
+ * New ABI, uses nvmem
+ */
+void rtc_nvmem_register(struct rtc_device *rtc)
+{
+	if (!rtc->nvmem_config)
+		return;
+
+	rtc->nvmem_config->dev = &rtc->dev;
+	rtc->nvmem_config->owner = rtc->owner;
+	rtc->nvmem = nvmem_register(rtc->nvmem_config);
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* Register the old ABI */
+	if (rtc->nvram_old_abi)
+		rtc_nvram_register(rtc);
+}
+
+void rtc_nvmem_unregister(struct rtc_device *rtc)
+{
+	if (IS_ERR_OR_NULL(rtc->nvmem))
+		return;
+
+	/* unregister the old ABI */
+	if (rtc->nvram)
+		rtc_nvram_unregister(rtc);
+
+	nvmem_unregister(rtc->nvmem);
+}

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index b60fd47..e221b78 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c

@@ -409,6 +409,11 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+	platform_set_drvdata(pdev, rtc);
+
 	sclk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(sclk))
 		return PTR_ERR(sclk);
@@ -441,13 +446,10 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 	if (!device_can_wakeup(&pdev->dev))
 		device_init_wakeup(&pdev->dev, 1);
 
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				&at91_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		ret = PTR_ERR(rtc);
+	rtc->ops = &at91_rtc_ops;
+	ret = rtc_register_device(rtc);
+	if (ret)
 		goto err_clk;
-	}
-	platform_set_drvdata(pdev, rtc);
 
 	/* enable SECEV interrupt in order to initialize at91_rtc_upd_rdy
 	 * completion.

diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c
new file mode 100644
index 0000000..796ac79
--- /dev/null
+++ b/drivers/rtc/rtc-brcmstb-waketimer.c

@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_wakeup.h>
+#include <linux/reboot.h>
+#include <linux/rtc.h>
+#include <linux/stat.h>
+#include <linux/suspend.h>
+
+struct brcmstb_waketmr {
+	struct rtc_device *rtc;
+	struct device *dev;
+	void __iomem *base;
+	int irq;
+	struct notifier_block reboot_notifier;
+	struct clk *clk;
+	u32 rate;
+};
+
+#define BRCMSTB_WKTMR_EVENT		0x00
+#define BRCMSTB_WKTMR_COUNTER		0x04
+#define BRCMSTB_WKTMR_ALARM		0x08
+#define BRCMSTB_WKTMR_PRESCALER		0x0C
+#define BRCMSTB_WKTMR_PRESCALER_VAL	0x10
+
+#define BRCMSTB_WKTMR_DEFAULT_FREQ	27000000
+
+static inline void brcmstb_waketmr_clear_alarm(struct brcmstb_waketmr *timer)
+{
+	writel_relaxed(1, timer->base + BRCMSTB_WKTMR_EVENT);
+	(void)readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
+}
+
+static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
+				      unsigned int secs)
+{
+	brcmstb_waketmr_clear_alarm(timer);
+
+	writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
+}
+
+static irqreturn_t brcmstb_waketmr_irq(int irq, void *data)
+{
+	struct brcmstb_waketmr *timer = data;
+
+	pm_wakeup_event(timer->dev, 0);
+
+	return IRQ_HANDLED;
+}
+
+struct wktmr_time {
+	u32 sec;
+	u32 pre;
+};
+
+static void wktmr_read(struct brcmstb_waketmr *timer,
+		       struct wktmr_time *t)
+{
+	u32 tmp;
+
+	do {
+		t->sec = readl_relaxed(timer->base + BRCMSTB_WKTMR_COUNTER);
+		tmp = readl_relaxed(timer->base + BRCMSTB_WKTMR_PRESCALER_VAL);
+	} while (tmp >= timer->rate);
+
+	t->pre = timer->rate - tmp;
+}
+
+static int brcmstb_waketmr_prepare_suspend(struct brcmstb_waketmr *timer)
+{
+	struct device *dev = timer->dev;
+	int ret = 0;
+
+	if (device_may_wakeup(dev)) {
+		ret = enable_irq_wake(timer->irq);
+		if (ret) {
+			dev_err(dev, "failed to enable wake-up interrupt\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+/* If enabled as a wakeup-source, arm the timer when powering off */
+static int brcmstb_waketmr_reboot(struct notifier_block *nb,
+		unsigned long action, void *data)
+{
+	struct brcmstb_waketmr *timer;
+
+	timer = container_of(nb, struct brcmstb_waketmr, reboot_notifier);
+
+	/* Set timer for cold boot */
+	if (action == SYS_POWER_OFF)
+		brcmstb_waketmr_prepare_suspend(timer);
+
+	return NOTIFY_DONE;
+}
+
+static int brcmstb_waketmr_gettime(struct device *dev,
+				   struct rtc_time *tm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	struct wktmr_time now;
+
+	wktmr_read(timer, &now);
+
+	rtc_time_to_tm(now.sec, tm);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_settime(struct device *dev,
+				   struct rtc_time *tm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+
+	sec = rtc_tm_to_time64(tm);
+
+	if (sec > U32_MAX || sec < 0)
+		return -EINVAL;
+
+	writel_relaxed(sec, timer->base + BRCMSTB_WKTMR_COUNTER);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_getalarm(struct device *dev,
+				    struct rtc_wkalrm *alarm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+	u32 reg;
+
+	sec = readl_relaxed(timer->base + BRCMSTB_WKTMR_ALARM);
+	if (sec != 0) {
+		/* Alarm is enabled */
+		alarm->enabled = 1;
+		rtc_time64_to_tm(sec, &alarm->time);
+	}
+
+	reg = readl_relaxed(timer->base + BRCMSTB_WKTMR_EVENT);
+	alarm->pending = !!(reg & 1);
+
+	return 0;
+}
+
+static int brcmstb_waketmr_setalarm(struct device *dev,
+				     struct rtc_wkalrm *alarm)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	time64_t sec;
+
+	if (alarm->enabled)
+		sec = rtc_tm_to_time64(&alarm->time);
+	else
+		sec = 0;
+
+	if (sec > U32_MAX || sec < 0)
+		return -EINVAL;
+
+	brcmstb_waketmr_set_alarm(timer, sec);
+
+	return 0;
+}
+
+/*
+ * Does not do much but keep the RTC class happy. We always support
+ * alarms.
+ */
+static int brcmstb_waketmr_alarm_enable(struct device *dev,
+					unsigned int enabled)
+{
+	return 0;
+}
+
+static const struct rtc_class_ops brcmstb_waketmr_ops = {
+	.read_time	= brcmstb_waketmr_gettime,
+	.set_time	= brcmstb_waketmr_settime,
+	.read_alarm	= brcmstb_waketmr_getalarm,
+	.set_alarm	= brcmstb_waketmr_setalarm,
+	.alarm_irq_enable = brcmstb_waketmr_alarm_enable,
+};
+
+static int brcmstb_waketmr_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct brcmstb_waketmr *timer;
+	struct resource *res;
+	int ret;
+
+	timer = devm_kzalloc(dev, sizeof(*timer), GFP_KERNEL);
+	if (!timer)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, timer);
+	timer->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	timer->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(timer->base))
+		return PTR_ERR(timer->base);
+
+	/*
+	 * Set wakeup capability before requesting wakeup interrupt, so we can
+	 * process boot-time "wakeups" (e.g., from S5 soft-off)
+	 */
+	device_set_wakeup_capable(dev, true);
+	device_wakeup_enable(dev);
+
+	timer->irq = platform_get_irq(pdev, 0);
+	if (timer->irq < 0)
+		return -ENODEV;
+
+	timer->clk = devm_clk_get(dev, NULL);
+	if (!IS_ERR(timer->clk)) {
+		ret = clk_prepare_enable(timer->clk);
+		if (ret)
+			return ret;
+		timer->rate = clk_get_rate(timer->clk);
+		if (!timer->rate)
+			timer->rate = BRCMSTB_WKTMR_DEFAULT_FREQ;
+	} else {
+		timer->rate = BRCMSTB_WKTMR_DEFAULT_FREQ;
+		timer->clk = NULL;
+	}
+
+	ret = devm_request_irq(dev, timer->irq, brcmstb_waketmr_irq, 0,
+			       "brcmstb-waketimer", timer);
+	if (ret < 0)
+		return ret;
+
+	timer->reboot_notifier.notifier_call = brcmstb_waketmr_reboot;
+	register_reboot_notifier(&timer->reboot_notifier);
+
+	timer->rtc = rtc_device_register("brcmstb-waketmr", dev,
+					 &brcmstb_waketmr_ops, THIS_MODULE);
+	if (IS_ERR(timer->rtc)) {
+		dev_err(dev, "unable to register device\n");
+		unregister_reboot_notifier(&timer->reboot_notifier);
+		return PTR_ERR(timer->rtc);
+	}
+
+	dev_info(dev, "registered, with irq %d\n", timer->irq);
+
+	return ret;
+}
+
+static int brcmstb_waketmr_remove(struct platform_device *pdev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(&pdev->dev);
+
+	unregister_reboot_notifier(&timer->reboot_notifier);
+	rtc_device_unregister(timer->rtc);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int brcmstb_waketmr_suspend(struct device *dev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+
+	return brcmstb_waketmr_prepare_suspend(timer);
+}
+
+static int brcmstb_waketmr_resume(struct device *dev)
+{
+	struct brcmstb_waketmr *timer = dev_get_drvdata(dev);
+	int ret;
+
+	if (!device_may_wakeup(dev))
+		return 0;
+
+	ret = disable_irq_wake(timer->irq);
+
+	brcmstb_waketmr_clear_alarm(timer);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(brcmstb_waketmr_pm_ops,
+			 brcmstb_waketmr_suspend, brcmstb_waketmr_resume);
+
+static const struct of_device_id brcmstb_waketmr_of_match[] = {
+	{ .compatible = "brcm,brcmstb-waketimer" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver brcmstb_waketmr_driver = {
+	.probe			= brcmstb_waketmr_probe,
+	.remove			= brcmstb_waketmr_remove,
+	.driver = {
+		.name		= "brcmstb-waketimer",
+		.pm		= &brcmstb_waketmr_pm_ops,
+		.of_match_table	= of_match_ptr(brcmstb_waketmr_of_match),
+	}
+};
+module_platform_driver(brcmstb_waketmr_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Brian Norris");
+MODULE_AUTHOR("Markus Mayer");
+MODULE_DESCRIPTION("Wake-up timer driver for STB chips");

diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 7a4ed2f..ecab76a 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h

@@ -45,3 +45,11 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 	return NULL;
 }
 #endif
+
+#ifdef CONFIG_RTC_NVMEM
+void rtc_nvmem_register(struct rtc_device *rtc);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index e81a871..794bc4f 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c

@@ -464,7 +464,7 @@ void rtc_dev_prepare(struct rtc_device *rtc)
 		return;
 
 	if (rtc->id >= RTC_DEV_MAX) {
-		dev_dbg(&rtc->dev, "%s: too many RTC devices\n", rtc->name);
+		dev_dbg(&rtc->dev, "too many RTC devices\n");
 		return;
 	}
 

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 77339b3..4fac49e 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c

@@ -24,6 +24,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/clk-provider.h>
+#include <linux/regmap.h>
 
 /*
  * We can't determine type by probing, but if we expect pre-Linux code
@@ -33,6 +34,7 @@
  */
 enum ds_type {
 	ds_1307,
+	ds_1308,
 	ds_1337,
 	ds_1338,
 	ds_1339,
@@ -43,6 +45,7 @@ enum ds_type {
 	m41t00,
 	mcp794xx,
 	rx_8025,
+	rx_8130,
 	last_ds_type /* always last */
 	/* rs5c372 too?  different address... */
 };
@@ -115,17 +118,16 @@ struct ds1307 {
 	u8			offset; /* register's offset */
 	u8			regs[11];
 	u16			nvram_offset;
-	struct bin_attribute	*nvram;
+	struct nvmem_config	nvmem_cfg;
 	enum ds_type		type;
 	unsigned long		flags;
 #define HAS_NVRAM	0		/* bit 0 == sysfs file active */
 #define HAS_ALARM	1		/* bit 1 == irq claimed */
-	struct i2c_client	*client;
+	struct device		*dev;
+	struct regmap		*regmap;
+	const char		*name;
+	int			irq;
 	struct rtc_device	*rtc;
-	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
-			       u8 length, u8 *values);
-	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
-				u8 length, const u8 *values);
 #ifdef CONFIG_COMMON_CLK
 	struct clk_hw		clks[2];
 #endif
@@ -135,21 +137,30 @@ struct chip_desc {
 	unsigned		alarm:1;
 	u16			nvram_offset;
 	u16			nvram_size;
+	u8			century_reg;
+	u8			century_enable_bit;
+	u8			century_bit;
 	u16			trickle_charger_reg;
 	u8			trickle_charger_setup;
-	u8			(*do_trickle_setup)(struct i2c_client *, uint32_t, bool);
+	u8			(*do_trickle_setup)(struct ds1307 *, uint32_t,
+						    bool);
 };
 
-static u8 do_trickle_setup_ds1339(struct i2c_client *,
-				  uint32_t ohms, bool diode);
+static u8 do_trickle_setup_ds1339(struct ds1307 *, uint32_t ohms, bool diode);
 
 static struct chip_desc chips[last_ds_type] = {
 	[ds_1307] = {
 		.nvram_offset	= 8,
 		.nvram_size	= 56,
 	},
+	[ds_1308] = {
+		.nvram_offset	= 8,
+		.nvram_size	= 56,
+	},
 	[ds_1337] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
 	},
 	[ds_1338] = {
 		.nvram_offset	= 8,
@@ -157,10 +168,15 @@ static struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_1339] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
 		.trickle_charger_reg = 0x10,
 		.do_trickle_setup = &do_trickle_setup_ds1339,
 	},
 	[ds_1340] = {
+		.century_reg	= DS1307_REG_HOUR,
+		.century_enable_bit = DS1340_BIT_CENTURY_EN,
+		.century_bit	= DS1340_BIT_CENTURY,
 		.trickle_charger_reg = 0x08,
 	},
 	[ds_1388] = {
@@ -168,6 +184,14 @@ static struct chip_desc chips[last_ds_type] = {
 	},
 	[ds_3231] = {
 		.alarm		= 1,
+		.century_reg	= DS1307_REG_MONTH,
+		.century_bit	= DS1337_BIT_CENTURY,
+	},
+	[rx_8130] = {
+		.alarm		= 1,
+		/* this is battery backed SRAM */
+		.nvram_offset	= 0x20,
+		.nvram_size	= 4,	/* 32bit (4 word x 8 bit) */
 	},
 	[mcp794xx] = {
 		.alarm		= 1,
@@ -179,6 +203,7 @@ static struct chip_desc chips[last_ds_type] = {
 
 static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1307", ds_1307 },
+	{ "ds1308", ds_1308 },
 	{ "ds1337", ds_1337 },
 	{ "ds1338", ds_1338 },
 	{ "ds1339", ds_1339 },
@@ -192,6 +217,7 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "pt7c4338", ds_1307 },
 	{ "rx8025", rx_8025 },
 	{ "isl12057", ds_1337 },
+	{ "rx8130", rx_8130 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
@@ -203,6 +229,10 @@ static const struct of_device_id ds1307_of_match[] = {
 		.data = (void *)ds_1307
 	},
 	{
+		.compatible = "dallas,ds1308",
+		.data = (void *)ds_1308
+	},
+	{
 		.compatible = "dallas,ds1337",
 		.data = (void *)ds_1337
 	},
@@ -262,6 +292,7 @@ MODULE_DEVICE_TABLE(of, ds1307_of_match);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id ds1307_acpi_ids[] = {
 	{ .id = "DS1307", .driver_data = ds_1307 },
+	{ .id = "DS1308", .driver_data = ds_1308 },
 	{ .id = "DS1337", .driver_data = ds_1337 },
 	{ .id = "DS1338", .driver_data = ds_1338 },
 	{ .id = "DS1339", .driver_data = ds_1339 },
@@ -280,136 +311,6 @@ static const struct acpi_device_id ds1307_acpi_ids[] = {
 MODULE_DEVICE_TABLE(acpi, ds1307_acpi_ids);
 #endif
 
-/*----------------------------------------------------------------------*/
-
-#define BLOCK_DATA_MAX_TRIES 10
-
-static s32 ds1307_read_block_data_once(const struct i2c_client *client,
-				       u8 command, u8 length, u8 *values)
-{
-	s32 i, data;
-
-	for (i = 0; i < length; i++) {
-		data = i2c_smbus_read_byte_data(client, command + i);
-		if (data < 0)
-			return data;
-		values[i] = data;
-	}
-	return i;
-}
-
-static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command,
-				  u8 length, u8 *values)
-{
-	u8 oldvalues[255];
-	s32 ret;
-	int tries = 0;
-
-	dev_dbg(&client->dev, "ds1307_read_block_data (length=%d)\n", length);
-	ret = ds1307_read_block_data_once(client, command, length, values);
-	if (ret < 0)
-		return ret;
-	do {
-		if (++tries > BLOCK_DATA_MAX_TRIES) {
-			dev_err(&client->dev,
-				"ds1307_read_block_data failed\n");
-			return -EIO;
-		}
-		memcpy(oldvalues, values, length);
-		ret = ds1307_read_block_data_once(client, command, length,
-						  values);
-		if (ret < 0)
-			return ret;
-	} while (memcmp(oldvalues, values, length));
-	return length;
-}
-
-static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command,
-				   u8 length, const u8 *values)
-{
-	u8 currvalues[255];
-	int tries = 0;
-
-	dev_dbg(&client->dev, "ds1307_write_block_data (length=%d)\n", length);
-	do {
-		s32 i, ret;
-
-		if (++tries > BLOCK_DATA_MAX_TRIES) {
-			dev_err(&client->dev,
-				"ds1307_write_block_data failed\n");
-			return -EIO;
-		}
-		for (i = 0; i < length; i++) {
-			ret = i2c_smbus_write_byte_data(client, command + i,
-							values[i]);
-			if (ret < 0)
-				return ret;
-		}
-		ret = ds1307_read_block_data_once(client, command, length,
-						  currvalues);
-		if (ret < 0)
-			return ret;
-	} while (memcmp(currvalues, values, length));
-	return length;
-}
-
-/*----------------------------------------------------------------------*/
-
-/* These RTC devices are not designed to be connected to a SMbus adapter.
-   SMbus limits block operations length to 32 bytes, whereas it's not
-   limited on I2C buses. As a result, accesses may exceed 32 bytes;
-   in that case, split them into smaller blocks */
-
-static s32 ds1307_native_smbus_write_block_data(const struct i2c_client *client,
-				u8 command, u8 length, const u8 *values)
-{
-	u8 suboffset = 0;
-
-	if (length <= I2C_SMBUS_BLOCK_MAX) {
-		s32 retval = i2c_smbus_write_i2c_block_data(client,
-					command, length, values);
-		if (retval < 0)
-			return retval;
-		return length;
-	}
-
-	while (suboffset < length) {
-		s32 retval = i2c_smbus_write_i2c_block_data(client,
-				command + suboffset,
-				min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
-				values + suboffset);
-		if (retval < 0)
-			return retval;
-
-		suboffset += I2C_SMBUS_BLOCK_MAX;
-	}
-	return length;
-}
-
-static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
-				u8 command, u8 length, u8 *values)
-{
-	u8 suboffset = 0;
-
-	if (length <= I2C_SMBUS_BLOCK_MAX)
-		return i2c_smbus_read_i2c_block_data(client,
-					command, length, values);
-
-	while (suboffset < length) {
-		s32 retval = i2c_smbus_read_i2c_block_data(client,
-				command + suboffset,
-				min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
-				values + suboffset);
-		if (retval < 0)
-			return retval;
-
-		suboffset += I2C_SMBUS_BLOCK_MAX;
-	}
-	return length;
-}
-
-/*----------------------------------------------------------------------*/
-
 /*
  * The ds1337 and ds1339 both have two alarms, but we only use the first
  * one (with a "seconds" field).  For ds1337 we expect nINTA is our alarm
@@ -417,27 +318,24 @@ static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
  */
 static irqreturn_t ds1307_irq(int irq, void *dev_id)
 {
-	struct i2c_client	*client = dev_id;
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_id;
 	struct mutex		*lock = &ds1307->rtc->ops_lock;
-	int			stat, control;
+	int			stat, ret;
 
 	mutex_lock(lock);
-	stat = i2c_smbus_read_byte_data(client, DS1337_REG_STATUS);
-	if (stat < 0)
+	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &stat);
+	if (ret)
 		goto out;
 
 	if (stat & DS1337_BIT_A1I) {
 		stat &= ~DS1337_BIT_A1I;
-		i2c_smbus_write_byte_data(client, DS1337_REG_STATUS, stat);
+		regmap_write(ds1307->regmap, DS1337_REG_STATUS, stat);
 
-		control = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-		if (control < 0)
+		ret = regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+					 DS1337_BIT_A1IE, 0);
+		if (ret)
 			goto out;
 
-		control &= ~DS1337_BIT_A1IE;
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, control);
-
 		rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
 	}
 
@@ -452,14 +350,14 @@ static irqreturn_t ds1307_irq(int irq, void *dev_id)
 static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 {
 	struct ds1307	*ds1307 = dev_get_drvdata(dev);
-	int		tmp;
+	int		tmp, ret;
+	const struct chip_desc *chip = &chips[ds1307->type];
 
 	/* read the RTC date and time registers all at once */
-	tmp = ds1307->read_block_data(ds1307->client,
-		ds1307->offset, 7, ds1307->regs);
-	if (tmp != 7) {
-		dev_err(dev, "%s error %d\n", "read", tmp);
-		return -EIO;
+	ret = regmap_bulk_read(ds1307->regmap, ds1307->offset, ds1307->regs, 7);
+	if (ret) {
+		dev_err(dev, "%s error %d\n", "read", ret);
+		return ret;
 	}
 
 	dev_dbg(dev, "%s: %7ph\n", "read", ds1307->regs);
@@ -481,22 +379,9 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 	t->tm_mon = bcd2bin(tmp) - 1;
 	t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
-#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-		if (ds1307->regs[DS1307_REG_MONTH] & DS1337_BIT_CENTURY)
-			t->tm_year += 100;
-		break;
-	case ds_1340:
-		if (ds1307->regs[DS1307_REG_HOUR] & DS1340_BIT_CENTURY)
-			t->tm_year += 100;
-		break;
-	default:
-		break;
-	}
-#endif
+	if (ds1307->regs[chip->century_reg] & chip->century_bit &&
+	    IS_ENABLED(CONFIG_RTC_DRV_DS1307_CENTURY))
+		t->tm_year += 100;
 
 	dev_dbg(dev, "%s secs=%d, mins=%d, "
 		"hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -511,6 +396,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 {
 	struct ds1307	*ds1307 = dev_get_drvdata(dev);
+	const struct chip_desc *chip = &chips[ds1307->type];
 	int		result;
 	int		tmp;
 	u8		*buf = ds1307->regs;
@@ -521,24 +407,14 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
 	if (t->tm_year < 100)
 		return -EINVAL;
 
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-	case ds_1340:
-		if (t->tm_year > 299)
-			return -EINVAL;
-	default:
-		if (t->tm_year > 199)
-			return -EINVAL;
-		break;
-	}
+#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
+	if (t->tm_year > (chip->century_bit ? 299 : 199))
+		return -EINVAL;
 #else
-	if (t->tm_year < 100 || t->tm_year > 199)
+	if (t->tm_year > 199)
 		return -EINVAL;
 #endif
 
@@ -553,19 +429,12 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 	tmp = t->tm_year - 100;
 	buf[DS1307_REG_YEAR] = bin2bcd(tmp);
 
-	switch (ds1307->type) {
-	case ds_1337:
-	case ds_1339:
-	case ds_3231:
-		if (t->tm_year > 199)
-			buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY;
-		break;
-	case ds_1340:
-		buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN;
-		if (t->tm_year > 199)
-			buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY;
-		break;
-	case mcp794xx:
+	if (chip->century_enable_bit)
+		buf[chip->century_reg] |= chip->century_enable_bit;
+	if (t->tm_year > 199 && chip->century_bit)
+		buf[chip->century_reg] |= chip->century_bit;
+
+	if (ds1307->type == mcp794xx) {
 		/*
 		 * these bits were cleared when preparing the date/time
 		 * values and need to be set again before writing the
@@ -573,16 +442,12 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		 */
 		buf[DS1307_REG_SECS] |= MCP794XX_BIT_ST;
 		buf[DS1307_REG_WDAY] |= MCP794XX_BIT_VBATEN;
-		break;
-	default:
-		break;
 	}
 
 	dev_dbg(dev, "%s: %7ph\n", "write", buf);
 
-	result = ds1307->write_block_data(ds1307->client,
-		ds1307->offset, 7, buf);
-	if (result < 0) {
+	result = regmap_bulk_write(ds1307->regmap, ds1307->offset, buf, 7);
+	if (result) {
 		dev_err(dev, "%s error %d\n", "write", result);
 		return result;
 	}
@@ -591,19 +456,18 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 
 static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client       *client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
 	int			ret;
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
 	/* read all ALARM1, ALARM2, and status registers at once */
-	ret = ds1307->read_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, ds1307->regs);
-	if (ret != 9) {
+	ret = regmap_bulk_read(ds1307->regmap, DS1339_REG_ALARM1_SECS,
+			       ds1307->regs, 9);
+	if (ret) {
 		dev_err(dev, "%s error %d\n", "alarm read", ret);
-		return -EIO;
+		return ret;
 	}
 
 	dev_dbg(dev, "%s: %4ph, %3ph, %2ph\n", "alarm read",
@@ -633,8 +497,7 @@ static int ds1337_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
 	unsigned char		*buf = ds1307->regs;
 	u8			control, status;
 	int			ret;
@@ -649,11 +512,10 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		t->enabled, t->pending);
 
 	/* read current status of both alarms and the chip */
-	ret = ds1307->read_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, buf);
-	if (ret != 9) {
+	ret = regmap_bulk_read(ds1307->regmap, DS1339_REG_ALARM1_SECS, buf, 9);
+	if (ret) {
 		dev_err(dev, "%s error %d\n", "alarm write", ret);
-		return -EIO;
+		return ret;
 	}
 	control = ds1307->regs[7];
 	status = ds1307->regs[8];
@@ -676,9 +538,8 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	buf[7] = control & ~(DS1337_BIT_A1IE | DS1337_BIT_A2IE);
 	buf[8] = status & ~(DS1337_BIT_A1I | DS1337_BIT_A2I);
 
-	ret = ds1307->write_block_data(client,
-			DS1339_REG_ALARM1_SECS, 9, buf);
-	if (ret < 0) {
+	ret = regmap_bulk_write(ds1307->regmap, DS1339_REG_ALARM1_SECS, buf, 9);
+	if (ret) {
 		dev_err(dev, "can't set alarm time\n");
 		return ret;
 	}
@@ -687,7 +548,7 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	if (t->enabled) {
 		dev_dbg(dev, "alarm IRQ armed\n");
 		buf[7] |= DS1337_BIT_A1IE;	/* only ALARM1 is used */
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, buf[7]);
+		regmap_write(ds1307->regmap, DS1337_REG_CONTROL, buf[7]);
 	}
 
 	return 0;
@@ -695,27 +556,14 @@ static int ds1337_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int ds1307_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct i2c_client	*client = to_i2c_client(dev);
-	struct ds1307		*ds1307 = i2c_get_clientdata(client);
-	int			ret;
+	struct ds1307		*ds1307 = dev_get_drvdata(dev);
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -ENOTTY;
 
-	ret = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-	if (ret < 0)
-		return ret;
-
-	if (enabled)
-		ret |= DS1337_BIT_A1IE;
-	else
-		ret &= ~DS1337_BIT_A1IE;
-
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, ret);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+				  DS1337_BIT_A1IE,
+				  enabled ? DS1337_BIT_A1IE : 0);
 }
 
 static const struct rtc_class_ops ds13xx_rtc_ops = {
@@ -729,6 +577,165 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 /*----------------------------------------------------------------------*/
 
 /*
+ * Alarm support for rx8130 devices.
+ */
+
+#define RX8130_REG_ALARM_MIN		0x07
+#define RX8130_REG_ALARM_HOUR		0x08
+#define RX8130_REG_ALARM_WEEK_OR_DAY	0x09
+#define RX8130_REG_EXTENSION		0x0c
+#define RX8130_REG_EXTENSION_WADA	(1 << 3)
+#define RX8130_REG_FLAG			0x0d
+#define RX8130_REG_FLAG_AF		(1 << 3)
+#define RX8130_REG_CONTROL0		0x0e
+#define RX8130_REG_CONTROL0_AIE		(1 << 3)
+
+static irqreturn_t rx8130_irq(int irq, void *dev_id)
+{
+	struct ds1307           *ds1307 = dev_id;
+	struct mutex            *lock = &ds1307->rtc->ops_lock;
+	u8 ctl[3];
+	int ret;
+
+	mutex_lock(lock);
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		goto out;
+	if (!(ctl[1] & RX8130_REG_FLAG_AF))
+		goto out;
+	ctl[1] &= ~RX8130_REG_FLAG_AF;
+	ctl[2] &= ~RX8130_REG_CONTROL0_AIE;
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		goto out;
+
+	rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
+
+out:
+	mutex_unlock(lock);
+
+	return IRQ_HANDLED;
+}
+
+static int rx8130_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	u8 ald[3], ctl[3];
+	int ret;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	/* Read alarm registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_ALARM_MIN, ald, 3);
+	if (ret < 0)
+		return ret;
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	t->enabled = !!(ctl[2] & RX8130_REG_CONTROL0_AIE);
+	t->pending = !!(ctl[1] & RX8130_REG_FLAG_AF);
+
+	/* Report alarm 0 time assuming 24-hour and day-of-month modes. */
+	t->time.tm_sec = -1;
+	t->time.tm_min = bcd2bin(ald[0] & 0x7f);
+	t->time.tm_hour = bcd2bin(ald[1] & 0x7f);
+	t->time.tm_wday = -1;
+	t->time.tm_mday = bcd2bin(ald[2] & 0x7f);
+	t->time.tm_mon = -1;
+	t->time.tm_year = -1;
+	t->time.tm_yday = -1;
+	t->time.tm_isdst = -1;
+
+	dev_dbg(dev, "%s, sec=%d min=%d hour=%d wday=%d mday=%d mon=%d enabled=%d\n",
+		__func__, t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
+		t->time.tm_wday, t->time.tm_mday, t->time.tm_mon, t->enabled);
+
+	return 0;
+}
+
+static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	u8 ald[3], ctl[3];
+	int ret;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	dev_dbg(dev, "%s, sec=%d min=%d hour=%d wday=%d mday=%d mon=%d "
+		"enabled=%d pending=%d\n", __func__,
+		t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
+		t->time.tm_wday, t->time.tm_mday, t->time.tm_mon,
+		t->enabled, t->pending);
+
+	/* Read control registers. */
+	ret = regmap_bulk_read(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	ctl[0] &= ~RX8130_REG_EXTENSION_WADA;
+	ctl[1] |= RX8130_REG_FLAG_AF;
+	ctl[2] &= ~RX8130_REG_CONTROL0_AIE;
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+	if (ret < 0)
+		return ret;
+
+	/* Hardware alarm precision is 1 minute! */
+	ald[0] = bin2bcd(t->time.tm_min);
+	ald[1] = bin2bcd(t->time.tm_hour);
+	ald[2] = bin2bcd(t->time.tm_mday);
+
+	ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_ALARM_MIN, ald, 3);
+	if (ret < 0)
+		return ret;
+
+	if (!t->enabled)
+		return 0;
+
+	ctl[2] |= RX8130_REG_CONTROL0_AIE;
+
+	return regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, 3);
+}
+
+static int rx8130_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
+	int ret, reg;
+
+	if (!test_bit(HAS_ALARM, &ds1307->flags))
+		return -EINVAL;
+
+	ret = regmap_read(ds1307->regmap, RX8130_REG_CONTROL0, &reg);
+	if (ret < 0)
+		return ret;
+
+	if (enabled)
+		reg |= RX8130_REG_CONTROL0_AIE;
+	else
+		reg &= ~RX8130_REG_CONTROL0_AIE;
+
+	return regmap_write(ds1307->regmap, RX8130_REG_CONTROL0, reg);
+}
+
+static const struct rtc_class_ops rx8130_rtc_ops = {
+	.read_time	= ds1307_get_time,
+	.set_time	= ds1307_set_time,
+	.read_alarm	= rx8130_read_alarm,
+	.set_alarm	= rx8130_set_alarm,
+	.alarm_irq_enable = rx8130_alarm_irq_enable,
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
  * Alarm support for mcp794xx devices.
  */
 
@@ -752,31 +759,27 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 
 static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 {
-	struct i2c_client       *client = dev_id;
-	struct ds1307           *ds1307 = i2c_get_clientdata(client);
+	struct ds1307           *ds1307 = dev_id;
 	struct mutex            *lock = &ds1307->rtc->ops_lock;
 	int reg, ret;
 
 	mutex_lock(lock);
 
 	/* Check and clear alarm 0 interrupt flag. */
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_ALARM0_CTRL);
-	if (reg < 0)
+	ret = regmap_read(ds1307->regmap, MCP794XX_REG_ALARM0_CTRL, &reg);
+	if (ret)
 		goto out;
 	if (!(reg & MCP794XX_BIT_ALMX_IF))
 		goto out;
 	reg &= ~MCP794XX_BIT_ALMX_IF;
-	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_ALARM0_CTRL, reg);
-	if (ret < 0)
+	ret = regmap_write(ds1307->regmap, MCP794XX_REG_ALARM0_CTRL, reg);
+	if (ret)
 		goto out;
 
 	/* Disable alarm 0. */
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
-	if (reg < 0)
-		goto out;
-	reg &= ~MCP794XX_BIT_ALM0_EN;
-	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
-	if (ret < 0)
+	ret = regmap_update_bits(ds1307->regmap, MCP794XX_REG_CONTROL,
+				 MCP794XX_BIT_ALM0_EN, 0);
+	if (ret)
 		goto out;
 
 	rtc_update_irq(ds1307->rtc, 1, RTC_AF | RTC_IRQF);
@@ -789,8 +792,7 @@ static irqreturn_t mcp794xx_irq(int irq, void *dev_id)
 
 static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
 	u8 *regs = ds1307->regs;
 	int ret;
 
@@ -798,8 +800,8 @@ static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 		return -EINVAL;
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	t->enabled = !!(regs[0] & MCP794XX_BIT_ALM0_EN);
@@ -828,8 +830,7 @@ static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 
 static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
 	unsigned char *regs = ds1307->regs;
 	int ret;
 
@@ -843,8 +844,8 @@ static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		t->enabled, t->pending);
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	/* Set alarm 0, using 24-hour and day-of-month modes. */
@@ -862,35 +863,26 @@ static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	/* Disable interrupt. We will not enable until completely programmed */
 	regs[0] &= ~MCP794XX_BIT_ALM0_EN;
 
-	ret = ds1307->write_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
-	if (ret < 0)
+	ret = regmap_bulk_write(ds1307->regmap, MCP794XX_REG_CONTROL, regs, 10);
+	if (ret)
 		return ret;
 
 	if (!t->enabled)
 		return 0;
 	regs[0] |= MCP794XX_BIT_ALM0_EN;
-	return i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, regs[0]);
+	return regmap_write(ds1307->regmap, MCP794XX_REG_CONTROL, regs[0]);
 }
 
 static int mcp794xx_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
-	struct i2c_client *client = to_i2c_client(dev);
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
-	int reg;
+	struct ds1307 *ds1307 = dev_get_drvdata(dev);
 
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
-	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
-	if (reg < 0)
-		return reg;
-
-	if (enabled)
-		reg |= MCP794XX_BIT_ALM0_EN;
-	else
-		reg &= ~MCP794XX_BIT_ALM0_EN;
-
-	return i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
+	return regmap_update_bits(ds1307->regmap, MCP794XX_REG_CONTROL,
+				  MCP794XX_BIT_ALM0_EN,
+				  enabled ? MCP794XX_BIT_ALM0_EN : 0);
 }
 
 static const struct rtc_class_ops mcp794xx_rtc_ops = {
@@ -903,50 +895,27 @@ static const struct rtc_class_ops mcp794xx_rtc_ops = {
 
 /*----------------------------------------------------------------------*/
 
-static ssize_t
-ds1307_nvram_read(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int ds1307_nvram_read(void *priv, unsigned int offset, void *val,
+			     size_t bytes)
 {
-	struct i2c_client	*client;
-	struct ds1307		*ds1307;
-	int			result;
+	struct ds1307 *ds1307 = priv;
 
-	client = kobj_to_i2c_client(kobj);
-	ds1307 = i2c_get_clientdata(client);
-
-	result = ds1307->read_block_data(client, ds1307->nvram_offset + off,
-								count, buf);
-	if (result < 0)
-		dev_err(&client->dev, "%s error %d\n", "nvram read", result);
-	return result;
+	return regmap_bulk_read(ds1307->regmap, ds1307->nvram_offset + offset,
+				val, bytes);
 }
 
-static ssize_t
-ds1307_nvram_write(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int ds1307_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t bytes)
 {
-	struct i2c_client	*client;
-	struct ds1307		*ds1307;
-	int			result;
+	struct ds1307 *ds1307 = priv;
 
-	client = kobj_to_i2c_client(kobj);
-	ds1307 = i2c_get_clientdata(client);
-
-	result = ds1307->write_block_data(client, ds1307->nvram_offset + off,
-								count, buf);
-	if (result < 0) {
-		dev_err(&client->dev, "%s error %d\n", "nvram write", result);
-		return result;
-	}
-	return count;
+	return regmap_bulk_write(ds1307->regmap, ds1307->nvram_offset + offset,
+				 val, bytes);
 }
 
-
 /*----------------------------------------------------------------------*/
 
-static u8 do_trickle_setup_ds1339(struct i2c_client *client,
+static u8 do_trickle_setup_ds1339(struct ds1307 *ds1307,
 				  uint32_t ohms, bool diode)
 {
 	u8 setup = (diode) ? DS1307_TRICKLE_CHARGER_DIODE :
@@ -963,14 +932,14 @@ static u8 do_trickle_setup_ds1339(struct i2c_client *client,
 		setup |= DS1307_TRICKLE_CHARGER_4K_OHM;
 		break;
 	default:
-		dev_warn(&client->dev,
+		dev_warn(ds1307->dev,
 			 "Unsupported ohm value %u in dt\n", ohms);
 		return 0;
 	}
 	return setup;
 }
 
-static void ds1307_trickle_init(struct i2c_client *client,
+static void ds1307_trickle_init(struct ds1307 *ds1307,
 				struct chip_desc *chip)
 {
 	uint32_t ohms = 0;
@@ -978,11 +947,12 @@ static void ds1307_trickle_init(struct i2c_client *client,
 
 	if (!chip->do_trickle_setup)
 		goto out;
-	if (device_property_read_u32(&client->dev, "trickle-resistor-ohms", &ohms))
+	if (device_property_read_u32(ds1307->dev, "trickle-resistor-ohms",
+				     &ohms))
 		goto out;
-	if (device_property_read_bool(&client->dev, "trickle-diode-disable"))
+	if (device_property_read_bool(ds1307->dev, "trickle-diode-disable"))
 		diode = false;
-	chip->trickle_charger_setup = chip->do_trickle_setup(client,
+	chip->trickle_charger_setup = chip->do_trickle_setup(ds1307,
 							     ohms, diode);
 out:
 	return;
@@ -1009,13 +979,10 @@ static int ds3231_hwmon_read_temp(struct device *dev, s32 *mC)
 	s16 temp;
 	int ret;
 
-	ret = ds1307->read_block_data(ds1307->client, DS3231_REG_TEMPERATURE,
-					sizeof(temp_buf), temp_buf);
-	if (ret < 0)
+	ret = regmap_bulk_read(ds1307->regmap, DS3231_REG_TEMPERATURE,
+			       temp_buf, sizeof(temp_buf));
+	if (ret)
 		return ret;
-	if (ret != sizeof(temp_buf))
-		return -EIO;
-
 	/*
 	 * Temperature is represented as a 10-bit code with a resolution of
 	 * 0.25 degree celsius and encoded in two's complement format.
@@ -1055,12 +1022,11 @@ static void ds1307_hwmon_register(struct ds1307 *ds1307)
 	if (ds1307->type != ds_3231)
 		return;
 
-	dev = devm_hwmon_device_register_with_groups(&ds1307->client->dev,
-						ds1307->client->name,
+	dev = devm_hwmon_device_register_with_groups(ds1307->dev, ds1307->name,
 						ds1307, ds3231_hwmon_groups);
 	if (IS_ERR(dev)) {
-		dev_warn(&ds1307->client->dev,
-			"unable to register hwmon device %ld\n", PTR_ERR(dev));
+		dev_warn(ds1307->dev, "unable to register hwmon device %ld\n",
+			 PTR_ERR(dev));
 	}
 }
 
@@ -1099,24 +1065,12 @@ static int ds3231_clk_sqw_rates[] = {
 
 static int ds1337_write_control(struct ds1307 *ds1307, u8 mask, u8 value)
 {
-	struct i2c_client *client = ds1307->client;
 	struct mutex *lock = &ds1307->rtc->ops_lock;
-	int control;
 	int ret;
 
 	mutex_lock(lock);
-
-	control = i2c_smbus_read_byte_data(client, DS1337_REG_CONTROL);
-	if (control < 0) {
-		ret = control;
-		goto out;
-	}
-
-	control &= ~mask;
-	control |= value;
-
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL, control);
-out:
+	ret = regmap_update_bits(ds1307->regmap, DS1337_REG_CONTROL,
+				 mask, value);
 	mutex_unlock(lock);
 
 	return ret;
@@ -1126,12 +1080,12 @@ static unsigned long ds3231_clk_sqw_recalc_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
 	struct ds1307 *ds1307 = clk_sqw_to_ds1307(hw);
-	int control;
+	int control, ret;
 	int rate_sel = 0;
 
-	control = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_CONTROL);
-	if (control < 0)
-		return control;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
+		return ret;
 	if (control & DS1337_BIT_RS1)
 		rate_sel += 1;
 	if (control & DS1337_BIT_RS2)
@@ -1195,11 +1149,11 @@ static void ds3231_clk_sqw_unprepare(struct clk_hw *hw)
 static int ds3231_clk_sqw_is_prepared(struct clk_hw *hw)
 {
 	struct ds1307 *ds1307 = clk_sqw_to_ds1307(hw);
-	int control;
+	int control, ret;
 
-	control = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_CONTROL);
-	if (control < 0)
-		return control;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_CONTROL, &control);
+	if (ret)
+		return ret;
 
 	return !(control & DS1337_BIT_INTCN);
 }
@@ -1221,26 +1175,13 @@ static unsigned long ds3231_clk_32khz_recalc_rate(struct clk_hw *hw,
 
 static int ds3231_clk_32khz_control(struct ds1307 *ds1307, bool enable)
 {
-	struct i2c_client *client = ds1307->client;
 	struct mutex *lock = &ds1307->rtc->ops_lock;
-	int status;
 	int ret;
 
 	mutex_lock(lock);
-
-	status = i2c_smbus_read_byte_data(client, DS1337_REG_STATUS);
-	if (status < 0) {
-		ret = status;
-		goto out;
-	}
-
-	if (enable)
-		status |= DS3231_BIT_EN32KHZ;
-	else
-		status &= ~DS3231_BIT_EN32KHZ;
-
-	ret = i2c_smbus_write_byte_data(client, DS1337_REG_STATUS, status);
-out:
+	ret = regmap_update_bits(ds1307->regmap, DS1337_REG_STATUS,
+				 DS3231_BIT_EN32KHZ,
+				 enable ? DS3231_BIT_EN32KHZ : 0);
 	mutex_unlock(lock);
 
 	return ret;
@@ -1263,11 +1204,11 @@ static void ds3231_clk_32khz_unprepare(struct clk_hw *hw)
 static int ds3231_clk_32khz_is_prepared(struct clk_hw *hw)
 {
 	struct ds1307 *ds1307 = clk_32khz_to_ds1307(hw);
-	int status;
+	int status, ret;
 
-	status = i2c_smbus_read_byte_data(ds1307->client, DS1337_REG_STATUS);
-	if (status < 0)
-		return status;
+	ret = regmap_read(ds1307->regmap, DS1337_REG_STATUS, &status);
+	if (ret)
+		return ret;
 
 	return !!(status & DS3231_BIT_EN32KHZ);
 }
@@ -1292,18 +1233,17 @@ static struct clk_init_data ds3231_clks_init[] = {
 
 static int ds3231_clks_register(struct ds1307 *ds1307)
 {
-	struct i2c_client *client = ds1307->client;
-	struct device_node *node = client->dev.of_node;
+	struct device_node *node = ds1307->dev->of_node;
 	struct clk_onecell_data	*onecell;
 	int i;
 
-	onecell = devm_kzalloc(&client->dev, sizeof(*onecell), GFP_KERNEL);
+	onecell = devm_kzalloc(ds1307->dev, sizeof(*onecell), GFP_KERNEL);
 	if (!onecell)
 		return -ENOMEM;
 
 	onecell->clk_num = ARRAY_SIZE(ds3231_clks_init);
-	onecell->clks = devm_kcalloc(&client->dev, onecell->clk_num,
-					sizeof(onecell->clks[0]), GFP_KERNEL);
+	onecell->clks = devm_kcalloc(ds1307->dev, onecell->clk_num,
+				     sizeof(onecell->clks[0]), GFP_KERNEL);
 	if (!onecell->clks)
 		return -ENOMEM;
 
@@ -1322,8 +1262,8 @@ static int ds3231_clks_register(struct ds1307 *ds1307)
 						&init.name);
 		ds1307->clks[i].init = &init;
 
-		onecell->clks[i] = devm_clk_register(&client->dev,
-							&ds1307->clks[i]);
+		onecell->clks[i] = devm_clk_register(ds1307->dev,
+						     &ds1307->clks[i]);
 		if (IS_ERR(onecell->clks[i]))
 			return PTR_ERR(onecell->clks[i]);
 	}
@@ -1345,8 +1285,8 @@ static void ds1307_clks_register(struct ds1307 *ds1307)
 
 	ret = ds3231_clks_register(ds1307);
 	if (ret) {
-		dev_warn(&ds1307->client->dev,
-			"unable to register clock device %d\n", ret);
+		dev_warn(ds1307->dev, "unable to register clock device %d\n",
+			 ret);
 	}
 }
 
@@ -1358,6 +1298,12 @@ static void ds1307_clks_register(struct ds1307 *ds1307)
 
 #endif /* CONFIG_COMMON_CLK */
 
+static const struct regmap_config regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x12,
+};
+
 static int ds1307_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -1365,7 +1311,6 @@ static int ds1307_probe(struct i2c_client *client,
 	int			err = -ENODEV;
 	int			tmp, wday;
 	struct chip_desc	*chip;
-	struct i2c_adapter	*adapter = to_i2c_adapter(client->dev.parent);
 	bool			want_irq = false;
 	bool			ds1307_can_wakeup_device = false;
 	unsigned char		*buf;
@@ -1382,17 +1327,22 @@ static int ds1307_probe(struct i2c_client *client,
 	};
 	const struct rtc_class_ops *rtc_ops = &ds13xx_rtc_ops;
 
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)
-	    && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK))
-		return -EIO;
-
 	ds1307 = devm_kzalloc(&client->dev, sizeof(struct ds1307), GFP_KERNEL);
 	if (!ds1307)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, ds1307);
+	dev_set_drvdata(&client->dev, ds1307);
+	ds1307->dev = &client->dev;
+	ds1307->name = client->name;
+	ds1307->irq = client->irq;
 
-	ds1307->client	= client;
+	ds1307->regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(ds1307->regmap)) {
+		dev_err(ds1307->dev, "regmap allocation failed\n");
+		return PTR_ERR(ds1307->regmap);
+	}
+
+	i2c_set_clientdata(client, ds1307);
 
 	if (client->dev.of_node) {
 		ds1307->type = (enum ds_type)
@@ -1405,7 +1355,7 @@ static int ds1307_probe(struct i2c_client *client,
 		const struct acpi_device_id *acpi_id;
 
 		acpi_id = acpi_match_device(ACPI_PTR(ds1307_acpi_ids),
-					    &client->dev);
+					    ds1307->dev);
 		if (!acpi_id)
 			return -ENODEV;
 		chip = &chips[acpi_id->driver_data];
@@ -1413,27 +1363,21 @@ static int ds1307_probe(struct i2c_client *client,
 	}
 
 	if (!pdata)
-		ds1307_trickle_init(client, chip);
+		ds1307_trickle_init(ds1307, chip);
 	else if (pdata->trickle_charger_setup)
 		chip->trickle_charger_setup = pdata->trickle_charger_setup;
 
 	if (chip->trickle_charger_setup && chip->trickle_charger_reg) {
-		dev_dbg(&client->dev, "writing trickle charger info 0x%x to 0x%x\n",
+		dev_dbg(ds1307->dev,
+			"writing trickle charger info 0x%x to 0x%x\n",
 		    DS13XX_TRICKLE_CHARGER_MAGIC | chip->trickle_charger_setup,
 		    chip->trickle_charger_reg);
-		i2c_smbus_write_byte_data(client, chip->trickle_charger_reg,
+		regmap_write(ds1307->regmap, chip->trickle_charger_reg,
 		    DS13XX_TRICKLE_CHARGER_MAGIC |
 		    chip->trickle_charger_setup);
 	}
 
 	buf = ds1307->regs;
-	if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
-		ds1307->read_block_data = ds1307_native_smbus_read_block_data;
-		ds1307->write_block_data = ds1307_native_smbus_write_block_data;
-	} else {
-		ds1307->read_block_data = ds1307_read_block_data;
-		ds1307->write_block_data = ds1307_write_block_data;
-	}
 
 #ifdef CONFIG_OF
 /*
@@ -1459,11 +1403,10 @@ static int ds1307_probe(struct i2c_client *client,
 	case ds_1339:
 	case ds_3231:
 		/* get registers that the "rtc" read below won't read... */
-		tmp = ds1307->read_block_data(ds1307->client,
-				DS1337_REG_CONTROL, 2, buf);
-		if (tmp != 2) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_bulk_read(ds1307->regmap, DS1337_REG_CONTROL,
+				       buf, 2);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
@@ -1477,8 +1420,8 @@ static int ds1307_probe(struct i2c_client *client,
 		 * For some variants, be sure alarms can trigger when we're
 		 * running on Vbackup (BBSQI/BBSQW)
 		 */
-		if (chip->alarm && (ds1307->client->irq > 0 ||
-						ds1307_can_wakeup_device)) {
+		if (chip->alarm && (ds1307->irq > 0 ||
+				    ds1307_can_wakeup_device)) {
 			ds1307->regs[0] |= DS1337_BIT_INTCN
 					| bbsqi_bitpos[ds1307->type];
 			ds1307->regs[0] &= ~(DS1337_BIT_A2IE | DS1337_BIT_A1IE);
@@ -1486,50 +1429,49 @@ static int ds1307_probe(struct i2c_client *client,
 			want_irq = true;
 		}
 
-		i2c_smbus_write_byte_data(client, DS1337_REG_CONTROL,
-							ds1307->regs[0]);
+		regmap_write(ds1307->regmap, DS1337_REG_CONTROL,
+			     ds1307->regs[0]);
 
 		/* oscillator fault?  clear flag, and warn */
 		if (ds1307->regs[1] & DS1337_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1337_REG_STATUS,
-				ds1307->regs[1] & ~DS1337_BIT_OSF);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1337_REG_STATUS,
+				     ds1307->regs[1] & ~DS1337_BIT_OSF);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 		}
 		break;
 
 	case rx_8025:
-		tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
-				RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
-		if (tmp != 2) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_bulk_read(ds1307->regmap,
+				       RX8025_REG_CTRL1 << 4 | 0x08, buf, 2);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
 		/* oscillator off?  turn it on, so clock can tick. */
 		if (!(ds1307->regs[1] & RX8025_BIT_XST)) {
 			ds1307->regs[1] |= RX8025_BIT_XST;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev,
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev,
 				 "oscillator stop detected - SET TIME!\n");
 		}
 
 		if (ds1307->regs[1] & RX8025_BIT_PON) {
 			ds1307->regs[1] &= ~RX8025_BIT_PON;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev, "power-on detected\n");
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev, "power-on detected\n");
 		}
 
 		if (ds1307->regs[1] & RX8025_BIT_VDET) {
 			ds1307->regs[1] &= ~RX8025_BIT_VDET;
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL2 << 4 | 0x08,
-						  ds1307->regs[1]);
-			dev_warn(&client->dev, "voltage drop detected\n");
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL2 << 4 | 0x08,
+				     ds1307->regs[1]);
+			dev_warn(ds1307->dev, "voltage drop detected\n");
 		}
 
 		/* make sure we are running in 24hour mode */
@@ -1537,16 +1479,15 @@ static int ds1307_probe(struct i2c_client *client,
 			u8 hour;
 
 			/* switch to 24 hour mode */
-			i2c_smbus_write_byte_data(client,
-						  RX8025_REG_CTRL1 << 4 | 0x08,
-						  ds1307->regs[0] |
-						  RX8025_BIT_2412);
+			regmap_write(ds1307->regmap,
+				     RX8025_REG_CTRL1 << 4 | 0x08,
+				     ds1307->regs[0] | RX8025_BIT_2412);
 
-			tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
-					RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
-			if (tmp != 2) {
-				dev_dbg(&client->dev, "read error %d\n", tmp);
-				err = -EIO;
+			err = regmap_bulk_read(ds1307->regmap,
+					       RX8025_REG_CTRL1 << 4 | 0x08,
+					       buf, 2);
+			if (err) {
+				dev_dbg(ds1307->dev, "read error %d\n", err);
 				goto exit;
 			}
 
@@ -1557,9 +1498,16 @@ static int ds1307_probe(struct i2c_client *client,
 			if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 				hour += 12;
 
-			i2c_smbus_write_byte_data(client,
-						  DS1307_REG_HOUR << 4 | 0x08,
-						  hour);
+			regmap_write(ds1307->regmap,
+				     DS1307_REG_HOUR << 4 | 0x08, hour);
+		}
+		break;
+	case rx_8130:
+		ds1307->offset = 0x10; /* Seconds starts at 0x10 */
+		rtc_ops = &rx8130_rtc_ops;
+		if (chip->alarm && ds1307->irq > 0) {
+			irq_handler = rx8130_irq;
+			want_irq = true;
 		}
 		break;
 	case ds_1388:
@@ -1567,7 +1515,8 @@ static int ds1307_probe(struct i2c_client *client,
 		break;
 	case mcp794xx:
 		rtc_ops = &mcp794xx_rtc_ops;
-		if (ds1307->client->irq > 0 && chip->alarm) {
+		if (chip->alarm && (ds1307->irq > 0 ||
+				    ds1307_can_wakeup_device)) {
 			irq_handler = mcp794xx_irq;
 			want_irq = true;
 		}
@@ -1578,10 +1527,9 @@ static int ds1307_probe(struct i2c_client *client,
 
 read_rtc:
 	/* read RTC registers */
-	tmp = ds1307->read_block_data(ds1307->client, ds1307->offset, 8, buf);
-	if (tmp != 8) {
-		dev_dbg(&client->dev, "read error %d\n", tmp);
-		err = -EIO;
+	err = regmap_bulk_read(ds1307->regmap, ds1307->offset, buf, 8);
+	if (err) {
+		dev_dbg(ds1307->dev, "read error %d\n", err);
 		goto exit;
 	}
 
@@ -1597,56 +1545,56 @@ static int ds1307_probe(struct i2c_client *client,
 	case m41t00:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 		break;
+	case ds_1308:
 	case ds_1338:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1307_BIT_CH)
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
 
 		/* oscillator fault?  clear flag, and warn */
 		if (ds1307->regs[DS1307_REG_CONTROL] & DS1338_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_CONTROL,
-					ds1307->regs[DS1307_REG_CONTROL]
-					& ~DS1338_BIT_OSF);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_CONTROL,
+					ds1307->regs[DS1307_REG_CONTROL] &
+					~DS1338_BIT_OSF);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 		break;
 	case ds_1340:
 		/* clock halted?  turn it on, so clock can tick. */
 		if (tmp & DS1340_BIT_nEOSC)
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS, 0);
+			regmap_write(ds1307->regmap, DS1307_REG_SECS, 0);
 
-		tmp = i2c_smbus_read_byte_data(client, DS1340_REG_FLAG);
-		if (tmp < 0) {
-			dev_dbg(&client->dev, "read error %d\n", tmp);
-			err = -EIO;
+		err = regmap_read(ds1307->regmap, DS1340_REG_FLAG, &tmp);
+		if (err) {
+			dev_dbg(ds1307->dev, "read error %d\n", err);
 			goto exit;
 		}
 
 		/* oscillator fault?  clear flag, and warn */
 		if (tmp & DS1340_BIT_OSF) {
-			i2c_smbus_write_byte_data(client, DS1340_REG_FLAG, 0);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1340_REG_FLAG, 0);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 		}
 		break;
 	case mcp794xx:
 		/* make sure that the backup battery is enabled */
 		if (!(ds1307->regs[DS1307_REG_WDAY] & MCP794XX_BIT_VBATEN)) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
-					ds1307->regs[DS1307_REG_WDAY]
-					| MCP794XX_BIT_VBATEN);
+			regmap_write(ds1307->regmap, DS1307_REG_WDAY,
+				     ds1307->regs[DS1307_REG_WDAY] |
+				     MCP794XX_BIT_VBATEN);
 		}
 
 		/* clock halted?  turn it on, so clock can tick. */
 		if (!(tmp & MCP794XX_BIT_ST)) {
-			i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
-					MCP794XX_BIT_ST);
-			dev_warn(&client->dev, "SET TIME!\n");
+			regmap_write(ds1307->regmap, DS1307_REG_SECS,
+				     MCP794XX_BIT_ST);
+			dev_warn(ds1307->dev, "SET TIME!\n");
 			goto read_rtc;
 		}
 
@@ -1680,16 +1628,15 @@ static int ds1307_probe(struct i2c_client *client,
 			tmp = 0;
 		if (ds1307->regs[DS1307_REG_HOUR] & DS1307_BIT_PM)
 			tmp += 12;
-		i2c_smbus_write_byte_data(client,
-				ds1307->offset + DS1307_REG_HOUR,
-				bin2bcd(tmp));
+		regmap_write(ds1307->regmap, ds1307->offset + DS1307_REG_HOUR,
+			     bin2bcd(tmp));
 	}
 
 	/*
 	 * Some IPs have weekday reset value = 0x1 which might not correct
 	 * hence compute the wday using the current date/month/year values
 	 */
-	ds1307_get_time(&client->dev, &tm);
+	ds1307_get_time(ds1307->dev, &tm);
 	wday = tm.tm_wday;
 	timestamp = rtc_tm_to_time64(&tm);
 	rtc_time64_to_tm(timestamp, &tm);
@@ -1699,78 +1646,63 @@ static int ds1307_probe(struct i2c_client *client,
 	 * If different then set the wday which we computed using
 	 * timestamp
 	 */
-	if (wday != tm.tm_wday) {
-		wday = i2c_smbus_read_byte_data(client, MCP794XX_REG_WEEKDAY);
-		wday = wday & ~MCP794XX_REG_WEEKDAY_WDAY_MASK;
-		wday = wday | (tm.tm_wday + 1);
-		i2c_smbus_write_byte_data(client, MCP794XX_REG_WEEKDAY, wday);
-	}
+	if (wday != tm.tm_wday)
+		regmap_update_bits(ds1307->regmap, MCP794XX_REG_WEEKDAY,
+				   MCP794XX_REG_WEEKDAY_WDAY_MASK,
+				   tm.tm_wday + 1);
 
 	if (want_irq) {
-		device_set_wakeup_capable(&client->dev, true);
+		device_set_wakeup_capable(ds1307->dev, true);
 		set_bit(HAS_ALARM, &ds1307->flags);
 	}
-	ds1307->rtc = devm_rtc_device_register(&client->dev, client->name,
-				rtc_ops, THIS_MODULE);
+
+	ds1307->rtc = devm_rtc_allocate_device(ds1307->dev);
 	if (IS_ERR(ds1307->rtc)) {
 		return PTR_ERR(ds1307->rtc);
 	}
 
-	if (ds1307_can_wakeup_device && ds1307->client->irq <= 0) {
+	if (ds1307_can_wakeup_device && ds1307->irq <= 0) {
 		/* Disable request for an IRQ */
 		want_irq = false;
-		dev_info(&client->dev, "'wakeup-source' is set, request for an IRQ is disabled!\n");
+		dev_info(ds1307->dev,
+			 "'wakeup-source' is set, request for an IRQ is disabled!\n");
 		/* We cannot support UIE mode if we do not have an IRQ line */
 		ds1307->rtc->uie_unsupported = 1;
 	}
 
 	if (want_irq) {
-		err = devm_request_threaded_irq(&client->dev,
-						client->irq, NULL, irq_handler,
+		err = devm_request_threaded_irq(ds1307->dev,
+						ds1307->irq, NULL, irq_handler,
 						IRQF_SHARED | IRQF_ONESHOT,
-						ds1307->rtc->name, client);
+						ds1307->name, ds1307);
 		if (err) {
 			client->irq = 0;
-			device_set_wakeup_capable(&client->dev, false);
+			device_set_wakeup_capable(ds1307->dev, false);
 			clear_bit(HAS_ALARM, &ds1307->flags);
-			dev_err(&client->dev, "unable to request IRQ!\n");
+			dev_err(ds1307->dev, "unable to request IRQ!\n");
 		} else
-			dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
+			dev_dbg(ds1307->dev, "got IRQ %d\n", client->irq);
 	}
 
 	if (chip->nvram_size) {
+		ds1307->nvmem_cfg.name = "ds1307_nvram";
+		ds1307->nvmem_cfg.word_size = 1;
+		ds1307->nvmem_cfg.stride = 1;
+		ds1307->nvmem_cfg.size = chip->nvram_size;
+		ds1307->nvmem_cfg.reg_read = ds1307_nvram_read;
+		ds1307->nvmem_cfg.reg_write = ds1307_nvram_write;
+		ds1307->nvmem_cfg.priv = ds1307;
+		ds1307->nvram_offset = chip->nvram_offset;
 
-		ds1307->nvram = devm_kzalloc(&client->dev,
-					sizeof(struct bin_attribute),
-					GFP_KERNEL);
-		if (!ds1307->nvram) {
-			dev_err(&client->dev, "cannot allocate memory for nvram sysfs\n");
-		} else {
-
-			ds1307->nvram->attr.name = "nvram";
-			ds1307->nvram->attr.mode = S_IRUGO | S_IWUSR;
-
-			sysfs_bin_attr_init(ds1307->nvram);
-
-			ds1307->nvram->read = ds1307_nvram_read;
-			ds1307->nvram->write = ds1307_nvram_write;
-			ds1307->nvram->size = chip->nvram_size;
-			ds1307->nvram_offset = chip->nvram_offset;
-
-			err = sysfs_create_bin_file(&client->dev.kobj,
-						    ds1307->nvram);
-			if (err) {
-				dev_err(&client->dev,
-					"unable to create sysfs file: %s\n",
-					ds1307->nvram->attr.name);
-			} else {
-				set_bit(HAS_NVRAM, &ds1307->flags);
-				dev_info(&client->dev, "%zu bytes nvram\n",
-					 ds1307->nvram->size);
-			}
-		}
+		ds1307->rtc->nvmem_config = &ds1307->nvmem_cfg;
+		ds1307->rtc->nvram_old_abi = true;
 	}
 
+	ds1307->rtc->ops = rtc_ops;
+	err = rtc_register_device(ds1307->rtc);
+	if (err)
+		return err;
+
 	ds1307_hwmon_register(ds1307);
 	ds1307_clks_register(ds1307);
 
@@ -1780,16 +1712,6 @@ static int ds1307_probe(struct i2c_client *client,
 	return err;
 }
 
-static int ds1307_remove(struct i2c_client *client)
-{
-	struct ds1307 *ds1307 = i2c_get_clientdata(client);
-
-	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
-		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
-
-	return 0;
-}
-
 static struct i2c_driver ds1307_driver = {
 	.driver = {
 		.name	= "rtc-ds1307",
@@ -1797,7 +1719,6 @@ static struct i2c_driver ds1307_driver = {
 		.acpi_match_table = ACPI_PTR(ds1307_acpi_ids),
 	},
 	.probe		= ds1307_probe,
-	.remove		= ds1307_remove,
 	.id_table	= ds1307_id,
 };
 

diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index deff431..0550f7b 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c

@@ -22,6 +22,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/regmap.h>
+#include <linux/hwmon.h>
 
 #define DS3232_REG_SECONDS      0x00
 #define DS3232_REG_MINUTES      0x01
@@ -46,6 +47,8 @@
 #       define DS3232_REG_SR_A2F     0x02
 #       define DS3232_REG_SR_A1F     0x01
 
+#define DS3232_REG_TEMPERATURE	0x11
+
 struct ds3232 {
 	struct device *dev;
 	struct regmap *regmap;
@@ -275,6 +278,120 @@ static int ds3232_update_alarm(struct device *dev, unsigned int enabled)
 	return ret;
 }
 
+/*
+ * Temperature sensor support for ds3232/ds3234 devices.
+ * A user-initiated temperature conversion is not started by this function,
+ * so the temperature is updated once every 64 seconds.
+ */
+static int ds3232_hwmon_read_temp(struct device *dev, long int *mC)
+{
+	struct ds3232 *ds3232 = dev_get_drvdata(dev);
+	u8 temp_buf[2];
+	s16 temp;
+	int ret;
+
+	ret = regmap_bulk_read(ds3232->regmap, DS3232_REG_TEMPERATURE, temp_buf,
+			       sizeof(temp_buf));
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Temperature is represented as a 10-bit code with a resolution of
+	 * 0.25 degree celsius and encoded in two's complement format.
+	 */
+	temp = (temp_buf[0] << 8) | temp_buf[1];
+	temp >>= 6;
+	*mC = temp * 250;
+
+	return 0;
+}
+
+static umode_t ds3232_hwmon_is_visible(const void *data,
+				       enum hwmon_sensor_types type,
+				       u32 attr, int channel)
+{
+	if (type != hwmon_temp)
+		return 0;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		return 0444;
+	default:
+		return 0;
+	}
+}
+
+static int ds3232_hwmon_read(struct device *dev,
+			     enum hwmon_sensor_types type,
+			     u32 attr, int channel, long *temp)
+{
+	int err;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		err = ds3232_hwmon_read_temp(dev, temp);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static u32 ds3232_hwmon_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
+	0
+};
+
+static const struct hwmon_channel_info ds3232_hwmon_chip = {
+	.type = hwmon_chip,
+	.config = ds3232_hwmon_chip_config,
+};
+
+static u32 ds3232_hwmon_temp_config[] = {
+	HWMON_T_INPUT,
+	0
+};
+
+static const struct hwmon_channel_info ds3232_hwmon_temp = {
+	.type = hwmon_temp,
+	.config = ds3232_hwmon_temp_config,
+};
+
+static const struct hwmon_channel_info *ds3232_hwmon_info[] = {
+	&ds3232_hwmon_chip,
+	&ds3232_hwmon_temp,
+	NULL
+};
+
+static const struct hwmon_ops ds3232_hwmon_hwmon_ops = {
+	.is_visible = ds3232_hwmon_is_visible,
+	.read = ds3232_hwmon_read,
+};
+
+static const struct hwmon_chip_info ds3232_hwmon_chip_info = {
+	.ops = &ds3232_hwmon_hwmon_ops,
+	.info = ds3232_hwmon_info,
+};
+
+static void ds3232_hwmon_register(struct device *dev, const char *name)
+{
+	struct ds3232 *ds3232 = dev_get_drvdata(dev);
+	struct device *hwmon_dev;
+
+	if (!IS_ENABLED(CONFIG_RTC_DRV_DS3232_HWMON))
+		return;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, name, ds3232,
+							&ds3232_hwmon_chip_info,
+							NULL);
+	if (IS_ERR(hwmon_dev)) {
+		dev_err(dev, "unable to register hwmon device %ld\n",
+			PTR_ERR(hwmon_dev));
+	}
+}
+
 static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct ds3232 *ds3232 = dev_get_drvdata(dev);
@@ -366,6 +483,8 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
 	if (ds3232->irq > 0)
 		device_init_wakeup(dev, 1);
 
+	ds3232_hwmon_register(dev, name);
+
 	ds3232->rtc = devm_rtc_device_register(dev, name, &ds3232_rtc_ops,
 						THIS_MODULE);
 	if (IS_ERR(ds3232->rtc))

diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
new file mode 100644
index 0000000..af8d6be
--- /dev/null
+++ b/drivers/rtc/rtc-ftrtc010.c

@@ -0,0 +1,208 @@
+/*
+ *  Faraday Technology FTRTC010 driver
+ *
+ *  Copyright (C) 2009 Janos Laube <janos.dev@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Original code for older kernel 2.6.15 are from Stormlinksemi
+ * first update from Janos Laube for > 2.6.29 kernels
+ *
+ * checkpatch fixes and usage of rtc-lib code
+ * Hans Ulli Kroll <ulli.kroll@googlemail.com>
+ */
+
+#include <linux/rtc.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+
+#define DRV_NAME        "rtc-ftrtc010"
+
+MODULE_AUTHOR("Hans Ulli Kroll <ulli.kroll@googlemail.com>");
+MODULE_DESCRIPTION("RTC driver for Gemini SoC");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
+
+struct ftrtc010_rtc {
+	struct rtc_device	*rtc_dev;
+	void __iomem		*rtc_base;
+	int			rtc_irq;
+	struct clk		*pclk;
+	struct clk		*extclk;
+};
+
+enum ftrtc010_rtc_offsets {
+	FTRTC010_RTC_SECOND		= 0x00,
+	FTRTC010_RTC_MINUTE		= 0x04,
+	FTRTC010_RTC_HOUR		= 0x08,
+	FTRTC010_RTC_DAYS		= 0x0C,
+	FTRTC010_RTC_ALARM_SECOND	= 0x10,
+	FTRTC010_RTC_ALARM_MINUTE	= 0x14,
+	FTRTC010_RTC_ALARM_HOUR		= 0x18,
+	FTRTC010_RTC_RECORD		= 0x1C,
+	FTRTC010_RTC_CR			= 0x20,
+};
+
+static irqreturn_t ftrtc010_rtc_interrupt(int irq, void *dev)
+{
+	return IRQ_HANDLED;
+}
+
+/*
+ * Looks like the RTC in the Gemini SoC is (totaly) broken
+ * We can't read/write directly the time from RTC registers.
+ * We must do some "offset" calculation to get the real time
+ *
+ * This FIX works pretty fine and Stormlinksemi aka Cortina-Networks does
+ * the same thing, without the rtc-lib.c calls.
+ */
+
+static int ftrtc010_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct ftrtc010_rtc *rtc = dev_get_drvdata(dev);
+
+	unsigned int  days, hour, min, sec;
+	unsigned long offset, time;
+
+	sec  = readl(rtc->rtc_base + FTRTC010_RTC_SECOND);
+	min  = readl(rtc->rtc_base + FTRTC010_RTC_MINUTE);
+	hour = readl(rtc->rtc_base + FTRTC010_RTC_HOUR);
+	days = readl(rtc->rtc_base + FTRTC010_RTC_DAYS);
+	offset = readl(rtc->rtc_base + FTRTC010_RTC_RECORD);
+
+	time = offset + days * 86400 + hour * 3600 + min * 60 + sec;
+
+	rtc_time_to_tm(time, tm);
+
+	return 0;
+}
+
+static int ftrtc010_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct ftrtc010_rtc *rtc = dev_get_drvdata(dev);
+	unsigned int sec, min, hour, day;
+	unsigned long offset, time;
+
+	if (tm->tm_year >= 2148)	/* EPOCH Year + 179 */
+		return -EINVAL;
+
+	rtc_tm_to_time(tm, &time);
+
+	sec = readl(rtc->rtc_base + FTRTC010_RTC_SECOND);
+	min = readl(rtc->rtc_base + FTRTC010_RTC_MINUTE);
+	hour = readl(rtc->rtc_base + FTRTC010_RTC_HOUR);
+	day = readl(rtc->rtc_base + FTRTC010_RTC_DAYS);
+
+	offset = time - (day * 86400 + hour * 3600 + min * 60 + sec);
+
+	writel(offset, rtc->rtc_base + FTRTC010_RTC_RECORD);
+	writel(0x01, rtc->rtc_base + FTRTC010_RTC_CR);
+
+	return 0;
+}
+
+static const struct rtc_class_ops ftrtc010_rtc_ops = {
+	.read_time     = ftrtc010_rtc_read_time,
+	.set_time      = ftrtc010_rtc_set_time,
+};
+
+static int ftrtc010_rtc_probe(struct platform_device *pdev)
+{
+	struct ftrtc010_rtc *rtc;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int ret;
+
+	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+	if (unlikely(!rtc))
+		return -ENOMEM;
+	platform_set_drvdata(pdev, rtc);
+
+	rtc->pclk = devm_clk_get(dev, "PCLK");
+	if (IS_ERR(rtc->pclk)) {
+		dev_err(dev, "could not get PCLK\n");
+	} else {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret) {
+			dev_err(dev, "failed to enable PCLK\n");
+			return ret;
+		}
+	}
+	rtc->extclk = devm_clk_get(dev, "EXTCLK");
+	if (IS_ERR(rtc->extclk)) {
+		dev_err(dev, "could not get EXTCLK\n");
+	} else {
+		ret = clk_prepare_enable(rtc->extclk);
+		if (ret) {
+			dev_err(dev, "failed to enable EXTCLK\n");
+			return ret;
+		}
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res)
+		return -ENODEV;
+
+	rtc->rtc_irq = res->start;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	rtc->rtc_base = devm_ioremap(dev, res->start,
+				     resource_size(res));
+	if (!rtc->rtc_base)
+		return -ENOMEM;
+
+	ret = devm_request_irq(dev, rtc->rtc_irq, ftrtc010_rtc_interrupt,
+			       IRQF_SHARED, pdev->name, dev);
+	if (unlikely(ret))
+		return ret;
+
+	rtc->rtc_dev = rtc_device_register(pdev->name, dev,
+					   &ftrtc010_rtc_ops, THIS_MODULE);
+	return PTR_ERR_OR_ZERO(rtc->rtc_dev);
+}
+
+static int ftrtc010_rtc_remove(struct platform_device *pdev)
+{
+	struct ftrtc010_rtc *rtc = platform_get_drvdata(pdev);
+
+	if (!IS_ERR(rtc->extclk))
+		clk_disable_unprepare(rtc->extclk);
+	if (!IS_ERR(rtc->pclk))
+		clk_disable_unprepare(rtc->pclk);
+	rtc_device_unregister(rtc->rtc_dev);
+
+	return 0;
+}
+
+static const struct of_device_id ftrtc010_rtc_dt_match[] = {
+	{ .compatible = "cortina,gemini-rtc" },
+	{ .compatible = "faraday,ftrtc010" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ftrtc010_rtc_dt_match);
+
+static struct platform_driver ftrtc010_rtc_driver = {
+	.driver		= {
+		.name	= DRV_NAME,
+		.of_match_table = ftrtc010_rtc_dt_match,
+	},
+	.probe		= ftrtc010_rtc_probe,
+	.remove		= ftrtc010_rtc_remove,
+};
+
+module_platform_driver_probe(ftrtc010_rtc_driver, ftrtc010_rtc_probe);

diff --git a/drivers/rtc/rtc-gemini.c b/drivers/rtc/rtc-gemini.c
deleted file mode 100644
index 5279390..0000000
--- a/drivers/rtc/rtc-gemini.c
+++ /dev/null

@@ -1,179 +0,0 @@
-/*
- *  Gemini OnChip RTC
- *
- *  Copyright (C) 2009 Janos Laube <janos.dev@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Original code for older kernel 2.6.15 are from Stormlinksemi
- * first update from Janos Laube for > 2.6.29 kernels
- *
- * checkpatch fixes and usage of rtc-lib code
- * Hans Ulli Kroll <ulli.kroll@googlemail.com>
- */
-
-#include <linux/rtc.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#define DRV_NAME        "rtc-gemini"
-
-MODULE_AUTHOR("Hans Ulli Kroll <ulli.kroll@googlemail.com>");
-MODULE_DESCRIPTION("RTC driver for Gemini SoC");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRV_NAME);
-
-struct gemini_rtc {
-	struct rtc_device	*rtc_dev;
-	void __iomem		*rtc_base;
-	int			rtc_irq;
-};
-
-enum gemini_rtc_offsets {
-	GEMINI_RTC_SECOND	= 0x00,
-	GEMINI_RTC_MINUTE	= 0x04,
-	GEMINI_RTC_HOUR		= 0x08,
-	GEMINI_RTC_DAYS		= 0x0C,
-	GEMINI_RTC_ALARM_SECOND	= 0x10,
-	GEMINI_RTC_ALARM_MINUTE	= 0x14,
-	GEMINI_RTC_ALARM_HOUR	= 0x18,
-	GEMINI_RTC_RECORD	= 0x1C,
-	GEMINI_RTC_CR		= 0x20
-};
-
-static irqreturn_t gemini_rtc_interrupt(int irq, void *dev)
-{
-	return IRQ_HANDLED;
-}
-
-/*
- * Looks like the RTC in the Gemini SoC is (totaly) broken
- * We can't read/write directly the time from RTC registers.
- * We must do some "offset" calculation to get the real time
- *
- * This FIX works pretty fine and Stormlinksemi aka Cortina-Networks does
- * the same thing, without the rtc-lib.c calls.
- */
-
-static int gemini_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	struct gemini_rtc *rtc = dev_get_drvdata(dev);
-
-	unsigned int  days, hour, min, sec;
-	unsigned long offset, time;
-
-	sec  = readl(rtc->rtc_base + GEMINI_RTC_SECOND);
-	min  = readl(rtc->rtc_base + GEMINI_RTC_MINUTE);
-	hour = readl(rtc->rtc_base + GEMINI_RTC_HOUR);
-	days = readl(rtc->rtc_base + GEMINI_RTC_DAYS);
-	offset = readl(rtc->rtc_base + GEMINI_RTC_RECORD);
-
-	time = offset + days * 86400 + hour * 3600 + min * 60 + sec;
-
-	rtc_time_to_tm(time, tm);
-
-	return 0;
-}
-
-static int gemini_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	struct gemini_rtc *rtc = dev_get_drvdata(dev);
-	unsigned int sec, min, hour, day;
-	unsigned long offset, time;
-
-	if (tm->tm_year >= 2148)	/* EPOCH Year + 179 */
-		return -EINVAL;
-
-	rtc_tm_to_time(tm, &time);
-
-	sec = readl(rtc->rtc_base + GEMINI_RTC_SECOND);
-	min = readl(rtc->rtc_base + GEMINI_RTC_MINUTE);
-	hour = readl(rtc->rtc_base + GEMINI_RTC_HOUR);
-	day = readl(rtc->rtc_base + GEMINI_RTC_DAYS);
-
-	offset = time - (day * 86400 + hour * 3600 + min * 60 + sec);
-
-	writel(offset, rtc->rtc_base + GEMINI_RTC_RECORD);
-	writel(0x01, rtc->rtc_base + GEMINI_RTC_CR);
-
-	return 0;
-}
-
-static const struct rtc_class_ops gemini_rtc_ops = {
-	.read_time     = gemini_rtc_read_time,
-	.set_time      = gemini_rtc_set_time,
-};
-
-static int gemini_rtc_probe(struct platform_device *pdev)
-{
-	struct gemini_rtc *rtc;
-	struct device *dev = &pdev->dev;
-	struct resource *res;
-	int ret;
-
-	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
-	if (unlikely(!rtc))
-		return -ENOMEM;
-	platform_set_drvdata(pdev, rtc);
-
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res)
-		return -ENODEV;
-
-	rtc->rtc_irq = res->start;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	rtc->rtc_base = devm_ioremap(dev, res->start,
-				     resource_size(res));
-	if (!rtc->rtc_base)
-		return -ENOMEM;
-
-	ret = devm_request_irq(dev, rtc->rtc_irq, gemini_rtc_interrupt,
-			       IRQF_SHARED, pdev->name, dev);
-	if (unlikely(ret))
-		return ret;
-
-	rtc->rtc_dev = rtc_device_register(pdev->name, dev,
-					   &gemini_rtc_ops, THIS_MODULE);
-	return PTR_ERR_OR_ZERO(rtc->rtc_dev);
-}
-
-static int gemini_rtc_remove(struct platform_device *pdev)
-{
-	struct gemini_rtc *rtc = platform_get_drvdata(pdev);
-
-	rtc_device_unregister(rtc->rtc_dev);
-
-	return 0;
-}
-
-static const struct of_device_id gemini_rtc_dt_match[] = {
-	{ .compatible = "cortina,gemini-rtc" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, gemini_rtc_dt_match);
-
-static struct platform_driver gemini_rtc_driver = {
-	.driver		= {
-		.name	= DRV_NAME,
-		.of_match_table = gemini_rtc_dt_match,
-	},
-	.probe		= gemini_rtc_probe,
-	.remove		= gemini_rtc_remove,
-};
-
-module_platform_driver_probe(gemini_rtc_driver, gemini_rtc_probe);

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index 5ec4653..8940e9e 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c

@@ -16,6 +16,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bcd.h>
+#include <linux/clk-provider.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -53,6 +54,8 @@
 #define M41T80_ALARM_REG_SIZE	\
 	(M41T80_REG_ALARM_SEC + 1 - M41T80_REG_ALARM_MON)
 
+#define M41T80_SQW_MAX_FREQ	32768
+
 #define M41T80_SEC_ST		BIT(7)	/* ST: Stop Bit */
 #define M41T80_ALMON_AFE	BIT(7)	/* AFE: AF Enable Bit */
 #define M41T80_ALMON_SQWE	BIT(6)	/* SQWE: SQW Enable Bit */
@@ -147,7 +150,11 @@ MODULE_DEVICE_TABLE(of, m41t80_of_match);
 
 struct m41t80_data {
 	unsigned long features;
+	struct i2c_client *client;
 	struct rtc_device *rtc;
+#ifdef CONFIG_COMMON_CLK
+	struct clk_hw sqw;
+#endif
 };
 
 static irqreturn_t m41t80_handle_irq(int irq, void *dev_id)
@@ -227,6 +234,7 @@ static int m41t80_get_datetime(struct i2c_client *client,
 /* Sets the given date and time to the real time clock. */
 static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
+	struct m41t80_data *clientdata = i2c_get_clientdata(client);
 	unsigned char buf[8];
 	int err, flags;
 
@@ -242,6 +250,17 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year - 100);
 	buf[M41T80_REG_WDAY] = tm->tm_wday;
 
+	/* If the square wave output is controlled in the weekday register */
+	if (clientdata->features & M41T80_FEATURE_SQ_ALT) {
+		int val;
+
+		val = i2c_smbus_read_byte_data(client, M41T80_REG_WDAY);
+		if (val < 0)
+			return val;
+
+		buf[M41T80_REG_WDAY] |= (val & 0xf0);
+	}
+
 	err = i2c_smbus_write_i2c_block_data(client, M41T80_REG_SSEC,
 					     sizeof(buf), buf);
 	if (err < 0) {
@@ -332,6 +351,9 @@ static int m41t80_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return err;
 	}
 
+	/* Keep SQWE bit value */
+	alarmvals[0] |= (ret & M41T80_ALMON_SQWE);
+
 	ret = i2c_smbus_read_byte_data(client, M41T80_REG_FLAGS);
 	if (ret < 0)
 		return ret;
@@ -431,96 +453,8 @@ static ssize_t flags_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(flags);
 
-static ssize_t sqwfreq_show(struct device *dev,
-			    struct device_attribute *attr, char *buf)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct m41t80_data *clientdata = i2c_get_clientdata(client);
-	int val, reg_sqw;
-
-	if (!(clientdata->features & M41T80_FEATURE_SQ))
-		return -EINVAL;
-
-	reg_sqw = M41T80_REG_SQW;
-	if (clientdata->features & M41T80_FEATURE_SQ_ALT)
-		reg_sqw = M41T80_REG_WDAY;
-	val = i2c_smbus_read_byte_data(client, reg_sqw);
-	if (val < 0)
-		return val;
-	val = (val >> 4) & 0xf;
-	switch (val) {
-	case 0:
-		break;
-	case 1:
-		val = 32768;
-		break;
-	default:
-		val = 32768 >> val;
-	}
-	return sprintf(buf, "%d\n", val);
-}
-
-static ssize_t sqwfreq_store(struct device *dev,
-			     struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct i2c_client *client = to_i2c_client(dev);
-	struct m41t80_data *clientdata = i2c_get_clientdata(client);
-	int almon, sqw, reg_sqw, rc;
-	unsigned long val;
-
-	rc = kstrtoul(buf, 0, &val);
-	if (rc < 0)
-		return rc;
-
-	if (!(clientdata->features & M41T80_FEATURE_SQ))
-		return -EINVAL;
-
-	if (val) {
-		if (!is_power_of_2(val))
-			return -EINVAL;
-		val = ilog2(val);
-		if (val == 15)
-			val = 1;
-		else if (val < 14)
-			val = 15 - val;
-		else
-			return -EINVAL;
-	}
-	/* disable SQW, set SQW frequency & re-enable */
-	almon = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
-	if (almon < 0)
-		return almon;
-	reg_sqw = M41T80_REG_SQW;
-	if (clientdata->features & M41T80_FEATURE_SQ_ALT)
-		reg_sqw = M41T80_REG_WDAY;
-	sqw = i2c_smbus_read_byte_data(client, reg_sqw);
-	if (sqw < 0)
-		return sqw;
-	sqw = (sqw & 0x0f) | (val << 4);
-
-	rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
-				       almon & ~M41T80_ALMON_SQWE);
-	if (rc < 0)
-		return rc;
-
-	if (val) {
-		rc = i2c_smbus_write_byte_data(client, reg_sqw, sqw);
-		if (rc < 0)
-			return rc;
-
-		rc = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
-					       almon | M41T80_ALMON_SQWE);
-		if (rc < 0)
-			return rc;
-	}
-	return count;
-}
-static DEVICE_ATTR_RW(sqwfreq);
-
 static struct attribute *attrs[] = {
 	&dev_attr_flags.attr,
-	&dev_attr_sqwfreq.attr,
 	NULL,
 };
 
@@ -528,6 +462,166 @@ static struct attribute_group attr_group = {
 	.attrs = attrs,
 };
 
+#ifdef CONFIG_COMMON_CLK
+#define sqw_to_m41t80_data(_hw) container_of(_hw, struct m41t80_data, sqw)
+
+static unsigned long m41t80_sqw_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int reg_sqw = (m41t80->features & M41T80_FEATURE_SQ_ALT) ?
+		M41T80_REG_WDAY : M41T80_REG_SQW;
+	int ret = i2c_smbus_read_byte_data(client, reg_sqw);
+	unsigned long val = M41T80_SQW_MAX_FREQ;
+
+	if (ret < 0)
+		return 0;
+
+	ret >>= 4;
+	if (ret == 0)
+		val = 0;
+	else if (ret > 1)
+		val = val / (1 << ret);
+
+	return val;
+}
+
+static long m41t80_sqw_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	int i, freq = M41T80_SQW_MAX_FREQ;
+
+	if (freq <= rate)
+		return freq;
+
+	for (i = 2; i <= ilog2(M41T80_SQW_MAX_FREQ); i++) {
+		freq /= 1 << i;
+		if (freq <= rate)
+			return freq;
+	}
+
+	return 0;
+}
+
+static int m41t80_sqw_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int reg_sqw = (m41t80->features & M41T80_FEATURE_SQ_ALT) ?
+		M41T80_REG_WDAY : M41T80_REG_SQW;
+	int reg, ret, val = 0;
+
+	if (rate) {
+		if (!is_power_of_2(rate))
+			return -EINVAL;
+		val = ilog2(rate);
+		if (val == ilog2(M41T80_SQW_MAX_FREQ))
+			val = 1;
+		else if (val < (ilog2(M41T80_SQW_MAX_FREQ) - 1))
+			val = ilog2(M41T80_SQW_MAX_FREQ) - val;
+		else
+			return -EINVAL;
+	}
+
+	reg = i2c_smbus_read_byte_data(client, reg_sqw);
+	if (reg < 0)
+		return reg;
+
+	reg = (reg & 0x0f) | (val << 4);
+
+	ret = i2c_smbus_write_byte_data(client, reg_sqw, reg);
+	if (ret < 0)
+		return ret;
+
+	return -EINVAL;
+}
+
+static int m41t80_sqw_control(struct clk_hw *hw, bool enable)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+
+	if (ret < 0)
+		return ret;
+
+	if (enable)
+		ret |= M41T80_ALMON_SQWE;
+	else
+		ret &= ~M41T80_ALMON_SQWE;
+
+	return i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON, ret);
+}
+
+static int m41t80_sqw_prepare(struct clk_hw *hw)
+{
+	return m41t80_sqw_control(hw, 1);
+}
+
+static void m41t80_sqw_unprepare(struct clk_hw *hw)
+{
+	m41t80_sqw_control(hw, 0);
+}
+
+static int m41t80_sqw_is_prepared(struct clk_hw *hw)
+{
+	struct m41t80_data *m41t80 = sqw_to_m41t80_data(hw);
+	struct i2c_client *client = m41t80->client;
+	int ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & M41T80_ALMON_SQWE);
+}
+
+static const struct clk_ops m41t80_sqw_ops = {
+	.prepare = m41t80_sqw_prepare,
+	.unprepare = m41t80_sqw_unprepare,
+	.is_prepared = m41t80_sqw_is_prepared,
+	.recalc_rate = m41t80_sqw_recalc_rate,
+	.round_rate = m41t80_sqw_round_rate,
+	.set_rate = m41t80_sqw_set_rate,
+};
+
+static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80)
+{
+	struct i2c_client *client = m41t80->client;
+	struct device_node *node = client->dev.of_node;
+	struct clk *clk;
+	struct clk_init_data init;
+	int ret;
+
+	/* First disable the clock */
+	ret = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_MON);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	ret = i2c_smbus_write_byte_data(client, M41T80_REG_ALARM_MON,
+					ret & ~(M41T80_ALMON_SQWE));
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	init.name = "m41t80-sqw";
+	init.ops = &m41t80_sqw_ops;
+	init.flags = 0;
+	init.parent_names = NULL;
+	init.num_parents = 0;
+	m41t80->sqw.init = &init;
+
+	/* optional override of the clockname */
+	of_property_read_string(node, "clock-output-names", &init.name);
+
+	/* register the clock */
+	clk = clk_register(&client->dev, &m41t80->sqw);
+	if (!IS_ERR(clk))
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+
+	return clk;
+}
+#endif
+
 #ifdef CONFIG_RTC_DRV_M41T80_WDT
 /*
  *****************************************************************************
@@ -845,6 +939,7 @@ static int m41t80_probe(struct i2c_client *client,
 	if (!m41t80_data)
 		return -ENOMEM;
 
+	m41t80_data->client = client;
 	if (client->dev.of_node)
 		m41t80_data->features = (unsigned long)
 			of_device_get_match_data(&client->dev);
@@ -937,6 +1032,10 @@ static int m41t80_probe(struct i2c_client *client,
 		}
 	}
 #endif
+#ifdef CONFIG_COMMON_CLK
+	if (m41t80_data->features & M41T80_FEATURE_SQ)
+		m41t80_sqw_register_clk(m41t80_data);
+#endif
 	return 0;
 }
 

diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 7731912..401f46d 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c

@@ -43,17 +43,6 @@
 
 #define MAX_PIE_NUM     9
 #define MAX_PIE_FREQ    512
-static const u32 PIE_BIT_DEF[MAX_PIE_NUM][2] = {
-	{ 2,		RTC_2HZ_BIT },
-	{ 4,		RTC_SAM0_BIT },
-	{ 8,		RTC_SAM1_BIT },
-	{ 16,		RTC_SAM2_BIT },
-	{ 32,		RTC_SAM3_BIT },
-	{ 64,		RTC_SAM4_BIT },
-	{ 128,		RTC_SAM5_BIT },
-	{ 256,		RTC_SAM6_BIT },
-	{ MAX_PIE_FREQ,	RTC_SAM7_BIT },
-};
 
 #define MXC_RTC_TIME	0
 #define MXC_RTC_ALARM	1

diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index b1b6b30..4ed8111 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c

@@ -93,7 +93,7 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
 	__raw_writel(AERPOWERON, nuc900_rtc->rtc_reg + REG_RTC_AER);
 
 	while (!(__raw_readl(nuc900_rtc->rtc_reg + REG_RTC_AER) & AERRWENB)
-								&& timeout--)
+								&& --timeout)
 		mdelay(1);
 
 	if (!timeout)

diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index ea20f62..e2a946c 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c

@@ -142,6 +142,16 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 
 	y_m_d = be32_to_cpu(__y_m_d);
 	h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32);
+
+	/* check if no alarm is set */
+	if (y_m_d == 0 && h_m_s_ms == 0) {
+		pr_debug("No alarm is set\n");
+		rc = -ENOENT;
+		goto exit;
+	} else {
+		pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms);
+	}
+
 	opal_to_tm(y_m_d, h_m_s_ms, &alarm->time);
 
 exit:
@@ -157,7 +167,14 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 	u32 y_m_d = 0;
 	int token, rc;
 
-	tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms);
+	/* if alarm is enabled */
+	if (alarm->enabled) {
+		tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms);
+		pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms);
+
+	} else {
+		pr_debug("Alarm getting disabled\n");
+	}
 
 	token = opal_async_get_token_interruptible();
 	if (token < 0) {
@@ -190,6 +207,18 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm)
 	return rc;
 }
 
+int opal_tpo_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct rtc_wkalrm alarm = { .enabled = 0 };
+
+	/*
+	 * TPO is automatically enabled when opal_set_tpo_time() is called with
+	 * non-zero rtc-time. We only handle disable case which needs to be
+	 * explicitly told to opal.
+	 */
+	return enabled ? 0 : opal_set_tpo_time(dev, &alarm);
+}
+
 static struct rtc_class_ops opal_rtc_ops = {
 	.read_time	= opal_get_rtc_time,
 	.set_time	= opal_set_rtc_time,
@@ -205,6 +234,7 @@ static int opal_rtc_probe(struct platform_device *pdev)
 		device_set_wakeup_capable(&pdev->dev, true);
 		opal_rtc_ops.read_alarm	= opal_get_tpo_time;
 		opal_rtc_ops.set_alarm = opal_set_tpo_time;
+		opal_rtc_ops.alarm_irq_enable = opal_tpo_alarm_irq_enable;
 	}
 
 	rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops,

diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 1227cea..cea6ea4 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c

@@ -606,7 +606,7 @@ static int pcf8563_probe(struct i2c_client *client,
 		err = devm_request_threaded_irq(&client->dev, client->irq,
 				NULL, pcf8563_irq,
 				IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING,
-				pcf8563->rtc->name, client);
+				pcf8563_driver.driver.name, client);
 		if (err) {
 			dev_err(&client->dev, "unable to request IRQ %d\n",
 								client->irq);

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 9ad97ab..aae2576 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c

@@ -68,6 +68,7 @@ struct rv8803_data {
 	struct mutex flags_lock;
 	u8 ctrl;
 	enum rv8803_type type;
+	struct nvmem_config nvmem_cfg;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -460,48 +461,32 @@ static int rv8803_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 	}
 }
 
-static ssize_t rv8803_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr,
-				  char *buf, loff_t off, size_t count)
+static int rv8803_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t bytes)
 {
-	struct device *dev = kobj_to_dev(kobj);
-	struct i2c_client *client = to_i2c_client(dev);
 	int ret;
 
-	ret = rv8803_write_reg(client, RV8803_RAM, buf[0]);
+	ret = rv8803_write_reg(priv, RV8803_RAM, *(u8 *)val);
 	if (ret)
 		return ret;
 
-	return 1;
+	return 0;
 }
 
-static ssize_t rv8803_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr,
-				 char *buf, loff_t off, size_t count)
+static int rv8803_nvram_read(void *priv, unsigned int offset,
+			     void *val, size_t bytes)
 {
-	struct device *dev = kobj_to_dev(kobj);
-	struct i2c_client *client = to_i2c_client(dev);
 	int ret;
 
-	ret = rv8803_read_reg(client, RV8803_RAM);
+	ret = rv8803_read_reg(priv, RV8803_RAM);
 	if (ret < 0)
 		return ret;
 
-	buf[0] = ret;
+	*(u8 *)val = ret;
 
-	return 1;
+	return 0;
 }
 
-static struct bin_attribute rv8803_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = 1,
-	.read = rv8803_nvram_read,
-	.write = rv8803_nvram_write,
-};
-
 static struct rtc_class_ops rv8803_rtc_ops = {
 	.read_time = rv8803_get_time,
 	.set_time = rv8803_set_time,
@@ -577,6 +562,11 @@ static int rv8803_probe(struct i2c_client *client,
 	if (flags & RV8803_FLAG_AF)
 		dev_warn(&client->dev, "An alarm maybe have been missed.\n");
 
+	rv8803->rtc = devm_rtc_allocate_device(&client->dev);
+	if (IS_ERR(rv8803->rtc)) {
+		return PTR_ERR(rv8803->rtc);
+	}
+
 	if (client->irq > 0) {
 		err = devm_request_threaded_irq(&client->dev, client->irq,
 						NULL, rv8803_handle_irq,
@@ -592,12 +582,20 @@ static int rv8803_probe(struct i2c_client *client,
 		}
 	}
 
-	rv8803->rtc = devm_rtc_device_register(&client->dev, client->name,
-					       &rv8803_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rv8803->rtc)) {
-		dev_err(&client->dev, "unable to register the class device\n");
-		return PTR_ERR(rv8803->rtc);
-	}
+	rv8803->nvmem_cfg.name = "rv8803_nvram",
+	rv8803->nvmem_cfg.word_size = 1,
+	rv8803->nvmem_cfg.stride = 1,
+	rv8803->nvmem_cfg.size = 1,
+	rv8803->nvmem_cfg.reg_read = rv8803_nvram_read,
+	rv8803->nvmem_cfg.reg_write = rv8803_nvram_write,
+	rv8803->nvmem_cfg.priv = client;
+
+	rv8803->rtc->ops = &rv8803_rtc_ops;
+	rv8803->rtc->nvmem_config = &rv8803->nvmem_cfg;
+	rv8803->rtc->nvram_old_abi = true;
+	err = rtc_register_device(rv8803->rtc);
+	if (err)
+		return err;
 
 	err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
 	if (err)
@@ -609,22 +607,11 @@ static int rv8803_probe(struct i2c_client *client,
 		return err;
 	}
 
-	err = device_create_bin_file(&client->dev, &rv8803_nvram_attr);
-	if (err)
-		return err;
-
 	rv8803->rtc->max_user_freq = 1;
 
 	return 0;
 }
 
-static int rv8803_remove(struct i2c_client *client)
-{
-	device_remove_bin_file(&client->dev, &rv8803_nvram_attr);
-
-	return 0;
-}
-
 static const struct i2c_device_id rv8803_id[] = {
 	{ "rv8803", rv_8803 },
 	{ "rx8900", rx_8900 },
@@ -651,7 +638,6 @@ static struct i2c_driver rv8803_driver = {
 		.of_match_table = of_match_ptr(rv8803_of_match),
 	},
 	.probe		= rv8803_probe,
-	.remove		= rv8803_remove,
 	.id_table	= rv8803_id,
 };
 module_i2c_driver(rv8803_driver);

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index d44fb34..a8992c2 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c

@@ -41,7 +41,7 @@ struct s3c_rtc {
 	struct clk *rtc_src_clk;
 	bool clk_disabled;
 
-	struct s3c_rtc_data *data;
+	const struct s3c_rtc_data *data;
 
 	int irq_alarm;
 	int irq_tick;
@@ -49,7 +49,8 @@ struct s3c_rtc {
 	spinlock_t pie_lock;
 	spinlock_t alarm_clk_lock;
 
-	int ticnt_save, ticnt_en_save;
+	int ticnt_save;
+	int ticnt_en_save;
 	bool wake_en;
 };
 
@@ -67,18 +68,32 @@ struct s3c_rtc_data {
 	void (*disable) (struct s3c_rtc *info);
 };
 
-static void s3c_rtc_enable_clk(struct s3c_rtc *info)
+static int s3c_rtc_enable_clk(struct s3c_rtc *info)
 {
 	unsigned long irq_flags;
+	int ret = 0;
 
 	spin_lock_irqsave(&info->alarm_clk_lock, irq_flags);
+
 	if (info->clk_disabled) {
-		clk_enable(info->rtc_clk);
-		if (info->data->needs_src_clk)
-			clk_enable(info->rtc_src_clk);
+		ret = clk_enable(info->rtc_clk);
+		if (ret)
+			goto out;
+
+		if (info->data->needs_src_clk) {
+			ret = clk_enable(info->rtc_src_clk);
+			if (ret) {
+				clk_disable(info->rtc_clk);
+				goto out;
+			}
+		}
 		info->clk_disabled = false;
 	}
+
+out:
 	spin_unlock_irqrestore(&info->alarm_clk_lock, irq_flags);
+
+	return ret;
 }
 
 static void s3c_rtc_disable_clk(struct s3c_rtc *info)
@@ -121,10 +136,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	unsigned int tmp;
+	int ret;
 
 	dev_dbg(info->dev, "%s: aie=%d\n", __func__, enabled);
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	tmp = readb(info->base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
 
@@ -135,10 +153,13 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 
 	s3c_rtc_disable_clk(info);
 
-	if (enabled)
-		s3c_rtc_enable_clk(info);
-	else
+	if (enabled) {
+		ret = s3c_rtc_enable_clk(info);
+		if (ret)
+			return ret;
+	} else {
 		s3c_rtc_disable_clk(info);
+	}
 
 	return 0;
 }
@@ -146,10 +167,14 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
 /* Set RTC frequency */
 static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq)
 {
+	int ret;
+
 	if (!is_power_of_2(freq))
 		return -EINVAL;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 	spin_lock_irq(&info->pie_lock);
 
 	if (info->data->set_freq)
@@ -166,10 +191,13 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	unsigned int have_retried = 0;
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
- retry_get_time:
+retry_get_time:
 	rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
 	rtc_tm->tm_hour = readb(info->base + S3C2410_RTCHOUR);
 	rtc_tm->tm_mday = readb(info->base + S3C2410_RTCDATE);
@@ -199,8 +227,8 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year += 100;
 
 	dev_dbg(dev, "read time %04d.%02d.%02d %02d:%02d:%02d\n",
-		 1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-		 rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+		1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+		rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
 	rtc_tm->tm_mon -= 1;
 
@@ -211,10 +239,11 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	int year = tm->tm_year - 100;
+	int ret;
 
 	dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
-		 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
-		 tm->tm_hour, tm->tm_min, tm->tm_sec);
+		1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
 	/* we get around y2k by simply not supporting it */
 
@@ -223,7 +252,9 @@ static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
 		return -EINVAL;
 	}
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	writeb(bin2bcd(tm->tm_sec),  info->base + S3C2410_RTCSEC);
 	writeb(bin2bcd(tm->tm_min),  info->base + S3C2410_RTCMIN);
@@ -242,8 +273,11 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	struct rtc_time *alm_tm = &alrm->time;
 	unsigned int alm_en;
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	alm_tm->tm_sec  = readb(info->base + S3C2410_ALMSEC);
 	alm_tm->tm_min  = readb(info->base + S3C2410_ALMMIN);
@@ -259,9 +293,9 @@ static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0;
 
 	dev_dbg(dev, "read alarm %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-		 alm_en,
-		 1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
-		 alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
+		alm_en,
+		1900 + alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday,
+		alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec);
 
 	/* decode the alarm enable field */
 	if (alm_en & S3C2410_RTCALM_SECEN)
@@ -292,14 +326,17 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	struct s3c_rtc *info = dev_get_drvdata(dev);
 	struct rtc_time *tm = &alrm->time;
 	unsigned int alrm_en;
+	int ret;
 	int year = tm->tm_year - 100;
 
 	dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
-		 alrm->enabled,
-		 1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
-		 tm->tm_hour, tm->tm_min, tm->tm_sec);
+		alrm->enabled,
+		1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	alrm_en = readb(info->base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
 	writeb(0x00, info->base + S3C2410_RTCALM);
@@ -348,8 +385,11 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	if (info->data->enable_tick)
 		info->data->enable_tick(info, seq);
@@ -378,8 +418,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 		dev_info(info->dev, "rtc disabled, re-enabling\n");
 
 		tmp = readw(info->base + S3C2410_RTCCON);
-		writew(tmp | S3C2410_RTCCON_RTCEN,
-			info->base + S3C2410_RTCCON);
+		writew(tmp | S3C2410_RTCCON_RTCEN, info->base + S3C2410_RTCCON);
 	}
 
 	if (con & S3C2410_RTCCON_CNTSEL) {
@@ -387,7 +426,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
 		tmp = readw(info->base + S3C2410_RTCCON);
 		writew(tmp & ~S3C2410_RTCCON_CNTSEL,
-			info->base + S3C2410_RTCCON);
+		       info->base + S3C2410_RTCCON);
 	}
 
 	if (con & S3C2410_RTCCON_CLKRST) {
@@ -395,7 +434,7 @@ static void s3c24xx_rtc_enable(struct s3c_rtc *info)
 
 		tmp = readw(info->base + S3C2410_RTCCON);
 		writew(tmp & ~S3C2410_RTCCON_CLKRST,
-			info->base + S3C2410_RTCCON);
+		       info->base + S3C2410_RTCCON);
 	}
 }
 
@@ -437,12 +476,12 @@ static int s3c_rtc_remove(struct platform_device *pdev)
 
 static const struct of_device_id s3c_rtc_dt_match[];
 
-static struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev)
+static const struct s3c_rtc_data *s3c_rtc_get_data(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
 
 	match = of_match_node(s3c_rtc_dt_match, pdev->dev.of_node);
-	return (struct s3c_rtc_data *)match->data;
+	return match->data;
 }
 
 static int s3c_rtc_probe(struct platform_device *pdev)
@@ -481,7 +520,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	}
 
 	dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n",
-		 info->irq_tick, info->irq_alarm);
+		info->irq_tick, info->irq_alarm);
 
 	/* get the memory region */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -498,7 +537,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 			dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n");
 		return ret;
 	}
-	clk_prepare_enable(info->rtc_clk);
+	ret = clk_prepare_enable(info->rtc_clk);
+	if (ret)
+		return ret;
 
 	if (info->data->needs_src_clk) {
 		info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
@@ -510,10 +551,11 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 			else
 				dev_dbg(&pdev->dev,
 					"probe deferred due to missing rtc src clk\n");
-			clk_disable_unprepare(info->rtc_clk);
-			return ret;
+			goto err_src_clk;
 		}
-		clk_prepare_enable(info->rtc_src_clk);
+		ret = clk_prepare_enable(info->rtc_src_clk);
+		if (ret)
+			goto err_src_clk;
 	}
 
 	/* check to see if everything is setup correctly */
@@ -521,7 +563,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 		info->data->enable(info);
 
 	dev_dbg(&pdev->dev, "s3c2410_rtc: RTCCON=%02x\n",
-		 readw(info->base + S3C2410_RTCCON));
+		readw(info->base + S3C2410_RTCCON));
 
 	device_init_wakeup(&pdev->dev, 1);
 
@@ -541,7 +583,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	/* register RTC and exit */
 	info->rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops,
-				  THIS_MODULE);
+					     THIS_MODULE);
 	if (IS_ERR(info->rtc)) {
 		dev_err(&pdev->dev, "cannot attach rtc\n");
 		ret = PTR_ERR(info->rtc);
@@ -549,14 +591,14 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 	}
 
 	ret = devm_request_irq(&pdev->dev, info->irq_alarm, s3c_rtc_alarmirq,
-			  0,  "s3c2410-rtc alarm", info);
+			       0, "s3c2410-rtc alarm", info);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_alarm, ret);
 		goto err_nortc;
 	}
 
 	ret = devm_request_irq(&pdev->dev, info->irq_tick, s3c_rtc_tickirq,
-			  0,  "s3c2410-rtc tick", info);
+			       0, "s3c2410-rtc tick", info);
 	if (ret) {
 		dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_tick, ret);
 		goto err_nortc;
@@ -569,12 +611,13 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	return 0;
 
- err_nortc:
+err_nortc:
 	if (info->data->disable)
 		info->data->disable(info);
 
 	if (info->data->needs_src_clk)
 		clk_disable_unprepare(info->rtc_src_clk);
+err_src_clk:
 	clk_disable_unprepare(info->rtc_clk);
 
 	return ret;
@@ -585,8 +628,11 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 static int s3c_rtc_suspend(struct device *dev)
 {
 	struct s3c_rtc *info = dev_get_drvdata(dev);
+	int ret;
 
-	s3c_rtc_enable_clk(info);
+	ret = s3c_rtc_enable_clk(info);
+	if (ret)
+		return ret;
 
 	/* save TICNT for anyone using periodic interrupts */
 	if (info->data->save_tick_cnt)
@@ -747,8 +793,7 @@ static void s3c6410_rtc_restore_tick_cnt(struct s3c_rtc *info)
 	writel(info->ticnt_save, info->base + S3C2410_TICNT);
 	if (info->ticnt_en_save) {
 		con = readw(info->base + S3C2410_RTCCON);
-		writew(con | info->ticnt_en_save,
-				info->base + S3C2410_RTCCON);
+		writew(con | info->ticnt_en_save, info->base + S3C2410_RTCCON);
 	}
 }
 
@@ -802,19 +847,19 @@ static struct s3c_rtc_data const s3c6410_rtc_data = {
 static const struct of_device_id s3c_rtc_dt_match[] = {
 	{
 		.compatible = "samsung,s3c2410-rtc",
-		.data = (void *)&s3c2410_rtc_data,
+		.data = &s3c2410_rtc_data,
 	}, {
 		.compatible = "samsung,s3c2416-rtc",
-		.data = (void *)&s3c2416_rtc_data,
+		.data = &s3c2416_rtc_data,
 	}, {
 		.compatible = "samsung,s3c2443-rtc",
-		.data = (void *)&s3c2443_rtc_data,
+		.data = &s3c2443_rtc_data,
 	}, {
 		.compatible = "samsung,s3c6410-rtc",
-		.data = (void *)&s3c6410_rtc_data,
+		.data = &s3c6410_rtc_data,
 	}, {
 		.compatible = "samsung,exynos3250-rtc",
-		.data = (void *)&s3c6410_rtc_data,
+		.data = &s3c6410_rtc_data,
 	},
 	{ /* sentinel */ },
 };

diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
index 74c0a33..82b0af1 100644
--- a/drivers/rtc/rtc-st-lpc.c
+++ b/drivers/rtc/rtc-st-lpc.c

@@ -99,7 +99,7 @@ static int st_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	lpt = ((unsigned long long)lpt_msb << 32) | lpt_lsb;
 	do_div(lpt, rtc->clkrate);
-	rtc_time_to_tm(lpt, tm);
+	rtc_time64_to_tm(lpt, tm);
 
 	return 0;
 }
@@ -107,13 +107,10 @@ static int st_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int st_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct st_rtc *rtc = dev_get_drvdata(dev);
-	unsigned long long lpt;
-	unsigned long secs, flags;
-	int ret;
+	unsigned long long lpt, secs;
+	unsigned long flags;
 
-	ret = rtc_tm_to_time(tm, &secs);
-	if (ret)
-		return ret;
+	secs = rtc_tm_to_time64(tm);
 
 	lpt = (unsigned long long)secs * rtc->clkrate;
 
@@ -161,13 +158,13 @@ static int st_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct st_rtc *rtc = dev_get_drvdata(dev);
 	struct rtc_time now;
-	unsigned long now_secs;
-	unsigned long alarm_secs;
+	unsigned long long now_secs;
+	unsigned long long alarm_secs;
 	unsigned long long lpa;
 
 	st_rtc_read_time(dev, &now);
-	rtc_tm_to_time(&now, &now_secs);
-	rtc_tm_to_time(&t->time, &alarm_secs);
+	now_secs = rtc_tm_to_time64(&now);
+	alarm_secs = rtc_tm_to_time64(&t->time);
 
 	/* Invalid alarm time */
 	if (now_secs > alarm_secs)

diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
index bd57eb1..3a5c3d7 100644
--- a/drivers/rtc/rtc-stm32.c
+++ b/drivers/rtc/rtc-stm32.c

@@ -94,11 +94,17 @@
 /* STM32_PWR_CR bit field */
 #define PWR_CR_DBP			BIT(8)
 
+struct stm32_rtc_data {
+	bool has_pclk;
+};
+
 struct stm32_rtc {
 	struct rtc_device *rtc_dev;
 	void __iomem *base;
 	struct regmap *dbp;
-	struct clk *ck_rtc;
+	struct stm32_rtc_data *data;
+	struct clk *pclk;
+	struct clk *rtc_ck;
 	int irq_alarm;
 };
 
@@ -122,9 +128,9 @@ static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
 		writel_relaxed(isr, rtc->base + STM32_RTC_ISR);
 
 		/*
-		 * It takes around 2 ck_rtc clock cycles to enter in
+		 * It takes around 2 rtc_ck clock cycles to enter in
 		 * initialization phase mode (and have INITF flag set). As
-		 * slowest ck_rtc frequency may be 32kHz and highest should be
+		 * slowest rtc_ck frequency may be 32kHz and highest should be
 		 * 1MHz, we poll every 10 us with a timeout of 100ms.
 		 */
 		return readl_relaxed_poll_timeout_atomic(
@@ -153,7 +159,7 @@ static int stm32_rtc_wait_sync(struct stm32_rtc *rtc)
 
 	/*
 	 * Wait for RSF to be set to ensure the calendar registers are
-	 * synchronised, it takes around 2 ck_rtc clock cycles
+	 * synchronised, it takes around 2 rtc_ck clock cycles
 	 */
 	return readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
 						 isr,
@@ -456,7 +462,7 @@ static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	/*
 	 * Poll Alarm write flag to be sure that Alarm update is allowed: it
-	 * takes around 2 ck_rtc clock cycles
+	 * takes around 2 rtc_ck clock cycles
 	 */
 	ret = readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
 						isr,
@@ -490,8 +496,17 @@ static const struct rtc_class_ops stm32_rtc_ops = {
 	.alarm_irq_enable = stm32_rtc_alarm_irq_enable,
 };
 
+static const struct stm32_rtc_data stm32_rtc_data = {
+	.has_pclk = false,
+};
+
+static const struct stm32_rtc_data stm32h7_rtc_data = {
+	.has_pclk = true,
+};
+
 static const struct of_device_id stm32_rtc_of_match[] = {
-	{ .compatible = "st,stm32-rtc" },
+	{ .compatible = "st,stm32-rtc", .data = &stm32_rtc_data },
+	{ .compatible = "st,stm32h7-rtc", .data = &stm32h7_rtc_data },
 	{}
 };
 MODULE_DEVICE_TABLE(of, stm32_rtc_of_match);
@@ -503,7 +518,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
 	unsigned int rate;
 	int ret = 0;
 
-	rate = clk_get_rate(rtc->ck_rtc);
+	rate = clk_get_rate(rtc->rtc_ck);
 
 	/* Find prediv_a and prediv_s to obtain the 1Hz calendar clock */
 	pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
@@ -524,7 +539,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
 		pred_a = pred_a_max;
 		pred_s = (rate / (pred_a + 1)) - 1;
 
-		dev_warn(&pdev->dev, "ck_rtc is %s\n",
+		dev_warn(&pdev->dev, "rtc_ck is %s\n",
 			 (rate < ((pred_a + 1) * (pred_s + 1))) ?
 			 "fast" : "slow");
 	}
@@ -561,6 +576,7 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 {
 	struct stm32_rtc *rtc;
 	struct resource *res;
+	const struct of_device_id *match;
 	int ret;
 
 	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
@@ -579,15 +595,34 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(rtc->dbp);
 	}
 
-	rtc->ck_rtc = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(rtc->ck_rtc)) {
-		dev_err(&pdev->dev, "no ck_rtc clock");
-		return PTR_ERR(rtc->ck_rtc);
+	match = of_match_device(stm32_rtc_of_match, &pdev->dev);
+	rtc->data = (struct stm32_rtc_data *)match->data;
+
+	if (!rtc->data->has_pclk) {
+		rtc->pclk = NULL;
+		rtc->rtc_ck = devm_clk_get(&pdev->dev, NULL);
+	} else {
+		rtc->pclk = devm_clk_get(&pdev->dev, "pclk");
+		if (IS_ERR(rtc->pclk)) {
+			dev_err(&pdev->dev, "no pclk clock");
+			return PTR_ERR(rtc->pclk);
+		}
+		rtc->rtc_ck = devm_clk_get(&pdev->dev, "rtc_ck");
+	}
+	if (IS_ERR(rtc->rtc_ck)) {
+		dev_err(&pdev->dev, "no rtc_ck clock");
+		return PTR_ERR(rtc->rtc_ck);
 	}
 
-	ret = clk_prepare_enable(rtc->ck_rtc);
+	if (rtc->data->has_pclk) {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret)
+			return ret;
+	}
+
+	ret = clk_prepare_enable(rtc->rtc_ck);
 	if (ret)
-		return ret;
+		goto err;
 
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, PWR_CR_DBP);
 
@@ -595,7 +630,7 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 	 * After a system reset, RTC_ISR.INITS flag can be read to check if
 	 * the calendar has been initalized or not. INITS flag is reset by a
 	 * power-on reset (no vbat, no power-supply). It is not reset if
-	 * ck_rtc parent clock has changed (so RTC prescalers need to be
+	 * rtc_ck parent clock has changed (so RTC prescalers need to be
 	 * changed). That's why we cannot rely on this flag to know if RTC
 	 * init has to be done.
 	 */
@@ -646,7 +681,9 @@ static int stm32_rtc_probe(struct platform_device *pdev)
 
 	return 0;
 err:
-	clk_disable_unprepare(rtc->ck_rtc);
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
+	clk_disable_unprepare(rtc->rtc_ck);
 
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
 
@@ -667,7 +704,9 @@ static int stm32_rtc_remove(struct platform_device *pdev)
 	writel_relaxed(cr, rtc->base + STM32_RTC_CR);
 	stm32_rtc_wpr_lock(rtc);
 
-	clk_disable_unprepare(rtc->ck_rtc);
+	clk_disable_unprepare(rtc->rtc_ck);
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
 
 	/* Enable backup domain write protection */
 	regmap_update_bits(rtc->dbp, PWR_CR, PWR_CR_DBP, 0);
@@ -682,6 +721,9 @@ static int stm32_rtc_suspend(struct device *dev)
 {
 	struct stm32_rtc *rtc = dev_get_drvdata(dev);
 
+	if (rtc->data->has_pclk)
+		clk_disable_unprepare(rtc->pclk);
+
 	if (device_may_wakeup(dev))
 		return enable_irq_wake(rtc->irq_alarm);
 
@@ -693,6 +735,12 @@ static int stm32_rtc_resume(struct device *dev)
 	struct stm32_rtc *rtc = dev_get_drvdata(dev);
 	int ret = 0;
 
+	if (rtc->data->has_pclk) {
+		ret = clk_prepare_enable(rtc->pclk);
+		if (ret)
+			return ret;
+	}
+
 	ret = stm32_rtc_wait_sync(rtc);
 	if (ret < 0)
 		return ret;

diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 1218d5d..e364550 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c

@@ -27,7 +27,8 @@
 static ssize_t
 name_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%s\n", to_rtc_device(dev)->name);
+	return sprintf(buf, "%s %s\n", dev_driver_string(dev->parent),
+		       dev_name(dev->parent));
 }
 static DEVICE_ATTR_RO(name);
 

diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 65f5a79..98749fa 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c

@@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count,
 	}
 	if (!session->response)
 		session->response = (char *)__get_free_pages(GFP_KERNEL
-						| __GFP_REPEAT | GFP_DMA,
+						| __GFP_RETRY_MAYFAIL | GFP_DMA,
 						get_order(session->bufsize));
 	if (!session->response) {
 		mutex_unlock(&session->mutex);

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 1563b14..2ade613 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c

@@ -1115,7 +1115,7 @@ static const struct net_device_ops ctcm_mpc_netdev_ops = {
 	.ndo_start_xmit		= ctcmpc_tx,
 };
 
-void static ctcm_dev_setup(struct net_device *dev)
+static void ctcm_dev_setup(struct net_device *dev)
 {
 	dev->type = ARPHRD_SLIP;
 	dev->tx_queue_len = 100;

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 3062cde..8975cd3 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c

@@ -2408,7 +2408,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return rc;
 }
 
-int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
+inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
 	int cast_type = RTN_UNSPEC;
 	struct neighbour *n = NULL;

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 659ab48..1f75d03 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c

@@ -155,6 +155,9 @@ static long ibmvscsis_unregister_command_q(struct scsi_info *vscsi)
 		qrc = h_free_crq(vscsi->dds.unit_id);
 		switch (qrc) {
 		case H_SUCCESS:
+			spin_lock_bh(&vscsi->intr_lock);
+			vscsi->flags &= ~PREP_FOR_SUSPEND_FLAGS;
+			spin_unlock_bh(&vscsi->intr_lock);
 			break;
 
 		case H_HARDWARE:
@@ -422,6 +425,9 @@ static void ibmvscsis_disconnect(struct work_struct *work)
 	new_state = vscsi->new_state;
 	vscsi->new_state = 0;
 
+	vscsi->flags |= DISCONNECT_SCHEDULED;
+	vscsi->flags &= ~SCHEDULE_DISCONNECT;
+
 	pr_debug("disconnect: flags 0x%x, state 0x%hx\n", vscsi->flags,
 		 vscsi->state);
 
@@ -802,6 +808,13 @@ static long ibmvscsis_establish_new_q(struct scsi_info *vscsi)
 	long rc = ADAPT_SUCCESS;
 	uint format;
 
+	rc = h_vioctl(vscsi->dds.unit_id, H_ENABLE_PREPARE_FOR_SUSPEND, 30000,
+		      0, 0, 0, 0);
+	if (rc == H_SUCCESS)
+		vscsi->flags |= PREP_FOR_SUSPEND_ENABLED;
+	else if (rc != H_NOT_FOUND)
+		pr_err("Error from Enable Prepare for Suspend: %ld\n", rc);
+
 	vscsi->flags &= PRESERVE_FLAG_FIELDS;
 	vscsi->rsp_q_timer.timer_pops = 0;
 	vscsi->debit = 0;
@@ -951,6 +964,63 @@ static void ibmvscsis_free_cmd_resources(struct scsi_info *vscsi,
 }
 
 /**
+ * ibmvscsis_ready_for_suspend() - Helper function to call VIOCTL
+ * @vscsi:	Pointer to our adapter structure
+ * @idle:	Indicates whether we were called from adapter_idle.  This
+ *		is important to know if we need to do a disconnect, since if
+ *		we're called from adapter_idle, we're still processing the
+ *		current disconnect, so we can't just call post_disconnect.
+ *
+ * This function is called when the adapter is idle when phyp has sent
+ * us a Prepare for Suspend Transport Event.
+ *
+ * EXECUTION ENVIRONMENT:
+ *	Process or interrupt environment called with interrupt lock held
+ */
+static long ibmvscsis_ready_for_suspend(struct scsi_info *vscsi, bool idle)
+{
+	long rc = 0;
+	struct viosrp_crq *crq;
+
+	/* See if there is a Resume event in the queue */
+	crq = vscsi->cmd_q.base_addr + vscsi->cmd_q.index;
+
+	pr_debug("ready_suspend: flags 0x%x, state 0x%hx crq_valid:%x\n",
+		 vscsi->flags, vscsi->state, (int)crq->valid);
+
+	if (!(vscsi->flags & PREP_FOR_SUSPEND_ABORTED) && !(crq->valid)) {
+		rc = h_vioctl(vscsi->dds.unit_id, H_READY_FOR_SUSPEND, 0, 0, 0,
+			      0, 0);
+		if (rc) {
+			pr_err("Ready for Suspend Vioctl failed: %ld\n", rc);
+			rc = 0;
+		}
+	} else if (((vscsi->flags & PREP_FOR_SUSPEND_OVERWRITE) &&
+		    (vscsi->flags & PREP_FOR_SUSPEND_ABORTED)) ||
+		   ((crq->valid) && ((crq->valid != VALID_TRANS_EVENT) ||
+				     (crq->format != RESUME_FROM_SUSP)))) {
+		if (idle) {
+			vscsi->state = ERR_DISCONNECT_RECONNECT;
+			ibmvscsis_reset_queue(vscsi);
+			rc = -1;
+		} else if (vscsi->state == CONNECTED) {
+			ibmvscsis_post_disconnect(vscsi,
+						  ERR_DISCONNECT_RECONNECT, 0);
+		}
+
+		vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
+
+		if ((crq->valid) && ((crq->valid != VALID_TRANS_EVENT) ||
+				     (crq->format != RESUME_FROM_SUSP)))
+			pr_err("Invalid element in CRQ after Prepare for Suspend");
+	}
+
+	vscsi->flags &= ~(PREP_FOR_SUSPEND_PENDING | PREP_FOR_SUSPEND_ABORTED);
+
+	return rc;
+}
+
+/**
  * ibmvscsis_trans_event() - Handle a Transport Event
  * @vscsi:	Pointer to our adapter structure
  * @crq:	Pointer to CRQ entry containing the Transport Event
@@ -974,18 +1044,8 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi,
 	case PARTNER_FAILED:
 	case PARTNER_DEREGISTER:
 		ibmvscsis_delete_client_info(vscsi, true);
-		break;
-
-	default:
-		rc = ERROR;
-		dev_err(&vscsi->dev, "trans_event: invalid format %d\n",
-			(uint)crq->format);
-		ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT,
-					  RESPONSE_Q_DOWN);
-		break;
-	}
-
-	if (rc == ADAPT_SUCCESS) {
+		if (crq->format == MIGRATED)
+			vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
 		switch (vscsi->state) {
 		case NO_QUEUE:
 		case ERR_DISCONNECTED:
@@ -1034,6 +1094,60 @@ static long ibmvscsis_trans_event(struct scsi_info *vscsi,
 			vscsi->flags |= (RESPONSE_Q_DOWN | TRANS_EVENT);
 			break;
 		}
+		break;
+
+	case PREPARE_FOR_SUSPEND:
+		pr_debug("Prep for Suspend, crq status = 0x%x\n",
+			 (int)crq->status);
+		switch (vscsi->state) {
+		case ERR_DISCONNECTED:
+		case WAIT_CONNECTION:
+		case CONNECTED:
+			ibmvscsis_ready_for_suspend(vscsi, false);
+			break;
+		case SRP_PROCESSING:
+			vscsi->resume_state = vscsi->state;
+			vscsi->flags |= PREP_FOR_SUSPEND_PENDING;
+			if (crq->status == CRQ_ENTRY_OVERWRITTEN)
+				vscsi->flags |= PREP_FOR_SUSPEND_OVERWRITE;
+			ibmvscsis_post_disconnect(vscsi, WAIT_IDLE, 0);
+			break;
+		case NO_QUEUE:
+		case UNDEFINED:
+		case UNCONFIGURING:
+		case WAIT_ENABLED:
+		case ERR_DISCONNECT:
+		case ERR_DISCONNECT_RECONNECT:
+		case WAIT_IDLE:
+			pr_err("Invalid state for Prepare for Suspend Trans Event: 0x%x\n",
+			       vscsi->state);
+			break;
+		}
+		break;
+
+	case RESUME_FROM_SUSP:
+		pr_debug("Resume from Suspend, crq status = 0x%x\n",
+			 (int)crq->status);
+		if (vscsi->flags & PREP_FOR_SUSPEND_PENDING) {
+			vscsi->flags |= PREP_FOR_SUSPEND_ABORTED;
+		} else {
+			if ((crq->status == CRQ_ENTRY_OVERWRITTEN) ||
+			    (vscsi->flags & PREP_FOR_SUSPEND_OVERWRITE)) {
+				ibmvscsis_post_disconnect(vscsi,
+							  ERR_DISCONNECT_RECONNECT,
+							  0);
+				vscsi->flags &= ~PREP_FOR_SUSPEND_OVERWRITE;
+			}
+		}
+		break;
+
+	default:
+		rc = ERROR;
+		dev_err(&vscsi->dev, "trans_event: invalid format %d\n",
+			(uint)crq->format);
+		ibmvscsis_post_disconnect(vscsi, ERR_DISCONNECT,
+					  RESPONSE_Q_DOWN);
+		break;
 	}
 
 	rc = vscsi->flags & SCHEDULE_DISCONNECT;
@@ -1201,6 +1315,7 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi)
 static void ibmvscsis_adapter_idle(struct scsi_info *vscsi)
 {
 	int free_qs = false;
+	long rc = 0;
 
 	pr_debug("adapter_idle: flags 0x%x, state 0x%hx\n", vscsi->flags,
 		 vscsi->state);
@@ -1240,7 +1355,14 @@ static void ibmvscsis_adapter_idle(struct scsi_info *vscsi)
 		vscsi->rsp_q_timer.timer_pops = 0;
 		vscsi->debit = 0;
 		vscsi->credit = 0;
-		if (vscsi->flags & TRANS_EVENT) {
+		if (vscsi->flags & PREP_FOR_SUSPEND_PENDING) {
+			vscsi->state = vscsi->resume_state;
+			vscsi->resume_state = 0;
+			rc = ibmvscsis_ready_for_suspend(vscsi, true);
+			vscsi->flags &= ~DISCONNECT_SCHEDULED;
+			if (rc)
+				break;
+		} else if (vscsi->flags & TRANS_EVENT) {
 			vscsi->state = WAIT_CONNECTION;
 			vscsi->flags &= PRESERVE_FLAG_FIELDS;
 		} else {
@@ -3792,8 +3914,16 @@ static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
 {
 	struct ibmvscsis_tport *tport =
 		container_of(wwn, struct ibmvscsis_tport, tport_wwn);
+	u16 tpgt;
 	int rc;
 
+	if (strstr(name, "tpgt_") != name)
+		return ERR_PTR(-EINVAL);
+	rc = kstrtou16(name + 5, 0, &tpgt);
+	if (rc)
+		return ERR_PTR(rc);
+	tport->tport_tpgt = tpgt;
+
 	tport->releasing = false;
 
 	rc = core_tpg_register(&tport->tport_wwn, &tport->se_tpg,

diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
index b4391a8..cc96c27 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h

@@ -262,6 +262,14 @@ struct scsi_info {
 #define DISCONNECT_SCHEDULED          0x00800
 	/* remove function is sleeping */
 #define CFG_SLEEPING                  0x01000
+	/* Register for Prepare for Suspend Transport Events */
+#define PREP_FOR_SUSPEND_ENABLED      0x02000
+	/* Prepare for Suspend event sent */
+#define PREP_FOR_SUSPEND_PENDING      0x04000
+	/* Resume from Suspend event sent */
+#define PREP_FOR_SUSPEND_ABORTED      0x08000
+	/* Prepare for Suspend event overwrote another CRQ entry */
+#define PREP_FOR_SUSPEND_OVERWRITE    0x10000
 	u32 flags;
 	/* adapter lock */
 	spinlock_t intr_lock;
@@ -272,6 +280,7 @@ struct scsi_info {
 	/* used in crq, to tag what iu the response is for */
 	u64  empty_iu_tag;
 	uint new_state;
+	uint resume_state;
 	/* control block for the response queue timer */
 	struct timer_cb rsp_q_timer;
 	/* keep last client to enable proper accounting */
@@ -324,8 +333,13 @@ struct scsi_info {
 #define TARGET_STOP(VSCSI) (long)(((VSCSI)->state & DONT_PROCESS_STATE) | \
 				  ((VSCSI)->flags & BLOCK))
 
+#define PREP_FOR_SUSPEND_FLAGS  (PREP_FOR_SUSPEND_ENABLED | \
+				 PREP_FOR_SUSPEND_PENDING | \
+				 PREP_FOR_SUSPEND_ABORTED | \
+				 PREP_FOR_SUSPEND_OVERWRITE)
+
 /* flag bit that are not reset during disconnect */
-#define PRESERVE_FLAG_FIELDS 0
+#define PRESERVE_FLAG_FIELDS (PREP_FOR_SUSPEND_FLAGS)
 
 #define vio_iu(IUE) ((union viosrp_iu *)((IUE)->sbuf->buf))
 
@@ -333,8 +347,15 @@ struct scsi_info {
 #define WRITE_CMD(cdb)	(((cdb)[0] & 0x1F) == 0xA)
 
 #ifndef H_GET_PARTNER_INFO
-#define H_GET_PARTNER_INFO      0x0000000000000008LL
+#define H_GET_PARTNER_INFO              0x0000000000000008LL
 #endif
+#ifndef H_ENABLE_PREPARE_FOR_SUSPEND
+#define H_ENABLE_PREPARE_FOR_SUSPEND    0x000000000000001DLL
+#endif
+#ifndef H_READY_FOR_SUSPEND
+#define H_READY_FOR_SUSPEND             0x000000000000001ELL
+#endif
+
 
 #define h_copy_rdma(l, sa, sb, da, db) \
 		plpar_hcall_norets(H_COPY_RDMA, l, sa, sb, da, db)

diff --git a/drivers/scsi/ibmvscsi_tgt/libsrp.h b/drivers/scsi/ibmvscsi_tgt/libsrp.h
index 4696f33..9fec55b 100644
--- a/drivers/scsi/ibmvscsi_tgt/libsrp.h
+++ b/drivers/scsi/ibmvscsi_tgt/libsrp.h

@@ -30,10 +30,13 @@ enum srp_trans_event {
 	UNUSED_FORMAT = 0,
 	PARTNER_FAILED = 1,
 	PARTNER_DEREGISTER = 2,
-	MIGRATED = 6
+	MIGRATED = 6,
+	PREPARE_FOR_SUSPEND = 9,
+	RESUME_FROM_SUSP = 0xA
 };
 
 enum srp_status {
+	CRQ_ENTRY_OVERWRITTEN = 0x20,
 	HEADER_DESCRIPTOR = 0xF1,
 	PING = 0xF5,
 	PING_RESPONSE = 0xF6

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index cfe1d01..adc7845 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c

@@ -26,6 +26,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <asm/unaligned.h>
+#include <linux/t10-pi.h>
 #include <linux/crc-t10dif.h>
 #include <net/checksum.h>
 
@@ -2934,8 +2935,8 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
 				 * First check to see if a protection data
 				 * check is valid
 				 */
-				if ((src->ref_tag == 0xffffffff) ||
-				    (src->app_tag == 0xffff)) {
+				if ((src->ref_tag == T10_PI_REF_ESCAPE) ||
+				    (src->app_tag == T10_PI_APP_ESCAPE)) {
 					start_ref_tag++;
 					goto skipit;
 				}

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 6c6e624..7b3b702 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c

@@ -2040,9 +2040,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 	 * For type     3: ref & app tag is all 'f's
 	 * For type 0,1,2: app tag is all 'f's
 	 */
-	if ((a_app_tag == 0xffff) &&
+	if ((a_app_tag == T10_PI_APP_ESCAPE) &&
 	    ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
-	     (a_ref_tag == 0xffffffff))) {
+	     (a_ref_tag == T10_PI_REF_ESCAPE))) {
 		uint32_t blocks_done, resid;
 		sector_t lba_s = scsi_get_lba(cmd);
 
@@ -2084,9 +2084,9 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 			spt = page_address(sg_page(sg)) + sg->offset;
 			spt += j;
 
-			spt->app_tag = 0xffff;
+			spt->app_tag = T10_PI_APP_ESCAPE;
 			if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
+				spt->ref_tag = T10_PI_REF_ESCAPE;
 		}
 
 		return 0;

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2a0173e..c2dc836 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -1874,36 +1874,13 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	struct abts_recv_from_24xx *abts, struct fc_port *sess)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct se_session *se_sess = sess->se_sess;
 	struct qla_tgt_mgmt_cmd *mcmd;
-	struct qla_tgt_cmd *cmd;
-	struct se_cmd *se_cmd;
 	int rc;
-	bool found_lun = false;
-	unsigned long flags;
 
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
-	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
-		if (se_cmd->tag == abts->exchange_addr_to_abort) {
-			found_lun = true;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-
-	/* cmd not in LIO lists, look in qla list */
-	if (!found_lun) {
-		if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) {
-			/* send TASK_ABORT response immediately */
-			qlt_24xx_send_abts_resp(ha->base_qpair, abts,
-			    FCP_TMF_CMPL, false);
-			return 0;
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf081,
-			    "unable to find cmd in driver or LIO for tag 0x%x\n",
-			    abts->exchange_addr_to_abort);
-			return -ENOENT;
-		}
+	if (abort_cmd_for_tag(vha, abts->exchange_addr_to_abort)) {
+		/* send TASK_ABORT response immediately */
+		qlt_24xx_send_abts_resp(ha->base_qpair, abts, FCP_TMF_CMPL, false);
+		return 0;
 	}
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00f,
@@ -1919,14 +1896,17 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	}
 	memset(mcmd, 0, sizeof(*mcmd));
 
-	cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
 	mcmd->sess = sess;
 	memcpy(&mcmd->orig_iocb.abts, abts, sizeof(mcmd->orig_iocb.abts));
 	mcmd->reset_count = ha->base_qpair->chip_reset;
 	mcmd->tmr_func = QLA_TGT_ABTS;
 	mcmd->qpair = ha->base_qpair;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, cmd->unpacked_lun, mcmd->tmr_func,
+	/*
+	 * LUN is looked up by target-core internally based on the passed
+	 * abts->exchange_addr_to_abort tag.
+	 */
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, 0, mcmd->tmr_func,
 	    abts->exchange_addr_to_abort);
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf052,

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index c4b4148..b20da0d 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c

@@ -600,11 +600,13 @@ static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, u64 lun,
 	struct fc_port *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
 	int transl_tmr_func = 0;
+	int flags = TARGET_SCF_ACK_KREF;
 
 	switch (tmr_func) {
 	case QLA_TGT_ABTS:
 		pr_debug("%ld: ABTS received\n", sess->vha->host_no);
 		transl_tmr_func = TMR_ABORT_TASK;
+		flags |= TARGET_SCF_LOOKUP_LUN_FROM_TAG;
 		break;
 	case QLA_TGT_2G_ABORT_TASK:
 		pr_debug("%ld: 2G Abort Task received\n", sess->vha->host_no);
@@ -637,7 +639,7 @@ static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, u64 lun,
 	}
 
 	return target_submit_tmr(se_cmd, sess->se_sess, NULL, lun, mcmd,
-	    transl_tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
+	    transl_tmr_func, GFP_ATOMIC, tag, flags);
 }
 
 static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)

diff --git a/drivers/staging/greybus/hid.c b/drivers/staging/greybus/hid.c
index 730d746..465101b 100644
--- a/drivers/staging/greybus/hid.c
+++ b/drivers/staging/greybus/hid.c

@@ -32,8 +32,6 @@ struct gb_hid {
 	char				*inbuf;
 };
 
-static DEFINE_MUTEX(gb_hid_open_mutex);
-
 /* Routines to get controller's information over greybus */
 
 /* Operations performed on greybus */
@@ -346,19 +344,14 @@ static void gb_hid_stop(struct hid_device *hid)
 static int gb_hid_open(struct hid_device *hid)
 {
 	struct gb_hid *ghid = hid->driver_data;
-	int ret = 0;
+	int ret;
 
-	mutex_lock(&gb_hid_open_mutex);
-	if (!hid->open++) {
-		ret = gb_hid_set_power(ghid, GB_HID_TYPE_PWR_ON);
-		if (ret < 0)
-			hid->open--;
-		else
-			set_bit(GB_HID_STARTED, &ghid->flags);
-	}
-	mutex_unlock(&gb_hid_open_mutex);
+	ret = gb_hid_set_power(ghid, GB_HID_TYPE_PWR_ON);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	set_bit(GB_HID_STARTED, &ghid->flags);
+	return 0;
 }
 
 static void gb_hid_close(struct hid_device *hid)
@@ -366,21 +359,13 @@ static void gb_hid_close(struct hid_device *hid)
 	struct gb_hid *ghid = hid->driver_data;
 	int ret;
 
-	/*
-	 * Protecting hid->open to make sure we don't restart data acquistion
-	 * due to a resumption we no longer care about..
-	 */
-	mutex_lock(&gb_hid_open_mutex);
-	if (!--hid->open) {
-		clear_bit(GB_HID_STARTED, &ghid->flags);
+	clear_bit(GB_HID_STARTED, &ghid->flags);
 
-		/* Save some power */
-		ret = gb_hid_set_power(ghid, GB_HID_TYPE_PWR_OFF);
-		if (ret)
-			dev_err(&ghid->connection->bundle->dev,
-				"failed to power off (%d)\n", ret);
-	}
-	mutex_unlock(&gb_hid_open_mutex);
+	/* Save some power */
+	ret = gb_hid_set_power(ghid, GB_HID_TYPE_PWR_OFF);
+	if (ret)
+		dev_err(&ghid->connection->bundle->dev,
+			"failed to power off (%d)\n", ret);
 }
 
 static int gb_hid_power(struct hid_device *hid, int lvl)

diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index e389009..a4e3ae8 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c

@@ -915,6 +915,8 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
+	chip->onfi_set_features = nand_onfi_get_set_features_notsupp;
+	chip->onfi_get_features = nand_onfi_get_set_features_notsupp;
 
 	mtd = nand_to_mtd(chip);
 

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 3fdca2c..74e4975 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c

@@ -488,15 +488,13 @@ EXPORT_SYMBOL(iscsit_queue_rsp);
 
 void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
-	bool scsi_cmd = (cmd->iscsi_opcode == ISCSI_OP_SCSI_CMD);
-
 	spin_lock_bh(&conn->cmd_lock);
 	if (!list_empty(&cmd->i_conn_node) &&
 	    !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP))
 		list_del_init(&cmd->i_conn_node);
 	spin_unlock_bh(&conn->cmd_lock);
 
-	__iscsit_free_cmd(cmd, scsi_cmd, true);
+	__iscsit_free_cmd(cmd, true);
 }
 EXPORT_SYMBOL(iscsit_aborted_task);
 
@@ -1251,12 +1249,8 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 * execution.  These exceptions are processed in CmdSN order using
 	 * iscsit_check_received_cmdsn() in iscsit_get_immediate_data() below.
 	 */
-	if (cmd->sense_reason) {
-		if (cmd->reject_reason)
-			return 0;
-
+	if (cmd->sense_reason)
 		return 1;
-	}
 	/*
 	 * Call directly into transport_generic_new_cmd() to perform
 	 * the backend memory allocation.

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 535a8e0..0dd4c45 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c

@@ -781,6 +781,7 @@ DEF_TPG_ATTRIB(default_erl);
 DEF_TPG_ATTRIB(t10_pi);
 DEF_TPG_ATTRIB(fabric_prot_type);
 DEF_TPG_ATTRIB(tpg_enabled_sendtargets);
+DEF_TPG_ATTRIB(login_keys_workaround);
 
 static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
 	&iscsi_tpg_attrib_attr_authentication,
@@ -796,6 +797,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
 	&iscsi_tpg_attrib_attr_t10_pi,
 	&iscsi_tpg_attrib_attr_fabric_prot_type,
 	&iscsi_tpg_attrib_attr_tpg_enabled_sendtargets,
+	&iscsi_tpg_attrib_attr_login_keys_workaround,
 	NULL,
 };
 

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 6f88b31..7a6751f 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c

@@ -655,28 +655,6 @@ static void iscsi_target_do_login_rx(struct work_struct *work)
 	iscsit_deaccess_np(np, tpg, tpg_np);
 }
 
-static void iscsi_target_do_cleanup(struct work_struct *work)
-{
-	struct iscsi_conn *conn = container_of(work,
-				struct iscsi_conn, login_cleanup_work.work);
-	struct sock *sk = conn->sock->sk;
-	struct iscsi_login *login = conn->login;
-	struct iscsi_np *np = login->np;
-	struct iscsi_portal_group *tpg = conn->tpg;
-	struct iscsi_tpg_np *tpg_np = conn->tpg_np;
-
-	pr_debug("Entering iscsi_target_do_cleanup\n");
-
-	cancel_delayed_work_sync(&conn->login_work);
-	conn->orig_state_change(sk);
-
-	iscsi_target_restore_sock_callbacks(conn);
-	iscsi_target_login_drop(conn, login);
-	iscsit_deaccess_np(np, tpg, tpg_np);
-
-	pr_debug("iscsi_target_do_cleanup done()\n");
-}
-
 static void iscsi_target_sk_state_change(struct sock *sk)
 {
 	struct iscsi_conn *conn;
@@ -886,7 +864,8 @@ static int iscsi_target_handle_csg_zero(
 			SENDER_TARGET,
 			login->rsp_buf,
 			&login->rsp_length,
-			conn->param_list);
+			conn->param_list,
+			conn->tpg->tpg_attrib.login_keys_workaround);
 	if (ret < 0)
 		return -1;
 
@@ -956,7 +935,8 @@ static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_log
 			SENDER_TARGET,
 			login->rsp_buf,
 			&login->rsp_length,
-			conn->param_list);
+			conn->param_list,
+			conn->tpg->tpg_attrib.login_keys_workaround);
 	if (ret < 0) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
 				ISCSI_LOGIN_STATUS_INIT_ERR);
@@ -1082,7 +1062,6 @@ int iscsi_target_locate_portal(
 	int sessiontype = 0, ret = 0, tag_num, tag_size;
 
 	INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx);
-	INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup);
 	iscsi_target_set_sock_callbacks(conn);
 
 	login->np = np;
@@ -1331,7 +1310,6 @@ int iscsi_target_start_negotiation(
 
 	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
-		cancel_delayed_work_sync(&conn->login_cleanup_work);
 		iscsi_target_restore_sock_callbacks(conn);
 		iscsi_remove_failed_auth_entry(conn);
 	}

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index fce6276..caab104 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c

@@ -765,7 +765,8 @@ static int iscsi_check_for_auth_key(char *key)
 	return 0;
 }
 
-static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
+static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param,
+						    bool keys_workaround)
 {
 	if (IS_TYPE_BOOL_AND(param)) {
 		if (!strcmp(param->value, NO))
@@ -773,19 +774,31 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
 	} else if (IS_TYPE_BOOL_OR(param)) {
 		if (!strcmp(param->value, YES))
 			SET_PSTATE_REPLY_OPTIONAL(param);
-		 /*
-		  * Required for gPXE iSCSI boot client
-		  */
-		if (!strcmp(param->name, IMMEDIATEDATA))
-			SET_PSTATE_REPLY_OPTIONAL(param);
+
+		if (keys_workaround) {
+			/*
+			 * Required for gPXE iSCSI boot client
+			 */
+			if (!strcmp(param->name, IMMEDIATEDATA))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
 	} else if (IS_TYPE_NUMBER(param)) {
 		if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
 			SET_PSTATE_REPLY_OPTIONAL(param);
-		/*
-		 * Required for gPXE iSCSI boot client
-		 */
-		if (!strcmp(param->name, MAXCONNECTIONS))
-			SET_PSTATE_REPLY_OPTIONAL(param);
+
+		if (keys_workaround) {
+			/*
+			 * Required for Mellanox Flexboot PXE boot ROM
+			 */
+			if (!strcmp(param->name, FIRSTBURSTLENGTH))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+
+			/*
+			 * Required for gPXE iSCSI boot client
+			 */
+			if (!strcmp(param->name, MAXCONNECTIONS))
+				SET_PSTATE_REPLY_OPTIONAL(param);
+		}
 	} else if (IS_PHASE_DECLARATIVE(param))
 		SET_PSTATE_REPLY_OPTIONAL(param);
 }
@@ -1422,7 +1435,8 @@ int iscsi_encode_text_output(
 	u8 sender,
 	char *textbuf,
 	u32 *length,
-	struct iscsi_param_list *param_list)
+	struct iscsi_param_list *param_list,
+	bool keys_workaround)
 {
 	char *output_buf = NULL;
 	struct iscsi_extra_response *er;
@@ -1458,7 +1472,8 @@ int iscsi_encode_text_output(
 			*length += 1;
 			output_buf = textbuf + *length;
 			SET_PSTATE_PROPOSER(param);
-			iscsi_check_proposer_for_optional_reply(param);
+			iscsi_check_proposer_for_optional_reply(param,
+							        keys_workaround);
 			pr_debug("Sending key: %s=%s\n",
 				param->name, param->value);
 		}

diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
index 9962ccf..c47b73f 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.h
+++ b/drivers/target/iscsi/iscsi_target_parameters.h

@@ -46,7 +46,7 @@ extern int iscsi_extract_key_value(char *, char **, char **);
 extern int iscsi_update_param_value(struct iscsi_param *, char *);
 extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_conn *);
 extern int iscsi_encode_text_output(u8, u8, char *, u32 *,
-			struct iscsi_param_list *);
+			struct iscsi_param_list *, bool);
 extern int iscsi_check_negotiated_keys(struct iscsi_param_list *);
 extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *,
 			struct iscsi_param_list *);

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 2e7e08d..594d07a 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c

@@ -227,6 +227,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
 	a->t10_pi = TA_DEFAULT_T10_PI;
 	a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE;
 	a->tpg_enabled_sendtargets = TA_DEFAULT_TPG_ENABLED_SENDTARGETS;
+	a->login_keys_workaround = TA_DEFAULT_LOGIN_KEYS_WORKAROUND;
 }
 
 int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
@@ -311,11 +312,9 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
 	int ret;
 
-	spin_lock(&tpg->tpg_state_lock);
 	if (tpg->tpg_state == TPG_STATE_ACTIVE) {
 		pr_err("iSCSI target portal group: %hu is already"
 			" active, ignoring request.\n", tpg->tpgt);
-		spin_unlock(&tpg->tpg_state_lock);
 		return -EINVAL;
 	}
 	/*
@@ -324,10 +323,8 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	 * is enforced (as per default), and remove the NONE option.
 	 */
 	param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list);
-	if (!param) {
-		spin_unlock(&tpg->tpg_state_lock);
+	if (!param)
 		return -EINVAL;
-	}
 
 	if (tpg->tpg_attrib.authentication) {
 		if (!strcmp(param->value, NONE)) {
@@ -341,6 +338,7 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 			goto err;
 	}
 
+	spin_lock(&tpg->tpg_state_lock);
 	tpg->tpg_state = TPG_STATE_ACTIVE;
 	spin_unlock(&tpg->tpg_state_lock);
 
@@ -353,7 +351,6 @@ int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg)
 	return 0;
 
 err:
-	spin_unlock(&tpg->tpg_state_lock);
 	return ret;
 }
 
@@ -899,3 +896,21 @@ int iscsit_ta_tpg_enabled_sendtargets(
 
 	return 0;
 }
+
+int iscsit_ta_login_keys_workaround(
+	struct iscsi_portal_group *tpg,
+	u32 flag)
+{
+	struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+	if ((flag != 0) && (flag != 1)) {
+		pr_err("Illegal value %d\n", flag);
+		return -EINVAL;
+	}
+
+	a->login_keys_workaround = flag;
+	pr_debug("iSCSI_TPG[%hu] - TPG enabled bit for login keys workaround: %s ",
+		tpg->tpgt, (a->login_keys_workaround) ? "ON" : "OFF");
+
+	return 0;
+}

diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h
index ceba298..59fd3ca 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h

@@ -48,5 +48,6 @@ extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_tpg_enabled_sendtargets(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_login_keys_workaround(struct iscsi_portal_group *, u32);
 
 #endif /* ISCSI_TARGET_TPG_H */

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7d3e2fc..1e36f83 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c

@@ -167,6 +167,7 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, int state)
 
 	cmd->se_cmd.map_tag = tag;
 	cmd->conn = conn;
+	cmd->data_direction = DMA_NONE;
 	INIT_LIST_HEAD(&cmd->i_conn_node);
 	INIT_LIST_HEAD(&cmd->datain_list);
 	INIT_LIST_HEAD(&cmd->cmd_r2t_list);
@@ -711,19 +712,16 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd)
 }
 EXPORT_SYMBOL(iscsit_release_cmd);
 
-void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd,
-		       bool check_queues)
+void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool check_queues)
 {
 	struct iscsi_conn *conn = cmd->conn;
 
-	if (scsi_cmd) {
-		if (cmd->data_direction == DMA_TO_DEVICE) {
-			iscsit_stop_dataout_timer(cmd);
-			iscsit_free_r2ts_from_list(cmd);
-		}
-		if (cmd->data_direction == DMA_FROM_DEVICE)
-			iscsit_free_all_datain_reqs(cmd);
+	if (cmd->data_direction == DMA_TO_DEVICE) {
+		iscsit_stop_dataout_timer(cmd);
+		iscsit_free_r2ts_from_list(cmd);
 	}
+	if (cmd->data_direction == DMA_FROM_DEVICE)
+		iscsit_free_all_datain_reqs(cmd);
 
 	if (conn && check_queues) {
 		iscsit_remove_cmd_from_immediate_queue(cmd, conn);
@@ -736,50 +734,18 @@ void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd,
 
 void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
 {
-	struct se_cmd *se_cmd = NULL;
+	struct se_cmd *se_cmd = cmd->se_cmd.se_tfo ? &cmd->se_cmd : NULL;
 	int rc;
-	bool op_scsi = false;
-	/*
-	 * Determine if a struct se_cmd is associated with
-	 * this struct iscsi_cmd.
-	 */
-	switch (cmd->iscsi_opcode) {
-	case ISCSI_OP_SCSI_CMD:
-		op_scsi = true;
-		/*
-		 * Fallthrough
-		 */
-	case ISCSI_OP_SCSI_TMFUNC:
-		se_cmd = &cmd->se_cmd;
-		__iscsit_free_cmd(cmd, op_scsi, shutdown);
+
+	__iscsit_free_cmd(cmd, shutdown);
+	if (se_cmd) {
 		rc = transport_generic_free_cmd(se_cmd, shutdown);
 		if (!rc && shutdown && se_cmd->se_sess) {
-			__iscsit_free_cmd(cmd, op_scsi, shutdown);
+			__iscsit_free_cmd(cmd, shutdown);
 			target_put_sess_cmd(se_cmd);
 		}
-		break;
-	case ISCSI_OP_REJECT:
-		/*
-		 * Handle special case for REJECT when iscsi_add_reject*() has
-		 * overwritten the original iscsi_opcode assignment, and the
-		 * associated cmd->se_cmd needs to be released.
-		 */
-		if (cmd->se_cmd.se_tfo != NULL) {
-			se_cmd = &cmd->se_cmd;
-			__iscsit_free_cmd(cmd, true, shutdown);
-
-			rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
-			if (!rc && shutdown && se_cmd->se_sess) {
-				__iscsit_free_cmd(cmd, true, shutdown);
-				target_put_sess_cmd(se_cmd);
-			}
-			break;
-		}
-		/* Fall-through */
-	default:
-		__iscsit_free_cmd(cmd, false, shutdown);
+	} else {
 		iscsit_release_cmd(cmd);
-		break;
 	}
 }
 EXPORT_SYMBOL(iscsit_free_cmd);

diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
index 9e4197a..4251605 100644
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h

@@ -37,7 +37,7 @@ extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_co
 extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);
 extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
 extern void iscsit_release_cmd(struct iscsi_cmd *);
-extern void __iscsit_free_cmd(struct iscsi_cmd *, bool, bool);
+extern void __iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern void iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern int iscsit_check_session_usage_count(struct iscsi_session *);
 extern void iscsit_dec_session_usage_count(struct iscsi_session *);

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 5091b31..b6a913e 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c

@@ -51,19 +51,7 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd);
  */
 static int tcm_loop_check_stop_free(struct se_cmd *se_cmd)
 {
-	/*
-	 * Do not release struct se_cmd's containing a valid TMR
-	 * pointer.  These will be released directly in tcm_loop_device_reset()
-	 * with transport_generic_free_cmd().
-	 */
-	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-		return 0;
-	/*
-	 * Release the struct se_cmd, which will make a callback to release
-	 * struct tcm_loop_cmd * in tcm_loop_deallocate_core_cmd()
-	 */
-	transport_generic_free_cmd(se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(se_cmd, 0);
 }
 
 static void tcm_loop_release_cmd(struct se_cmd *se_cmd)
@@ -218,10 +206,8 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
-	struct se_portal_group *se_tpg;
 	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
-	struct tcm_loop_tmr *tl_tmr = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
 
 	/*
@@ -240,55 +226,29 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 		return ret;
 	}
 
-	tl_tmr = kzalloc(sizeof(struct tcm_loop_tmr), GFP_KERNEL);
-	if (!tl_tmr) {
-		pr_err("Unable to allocate memory for tl_tmr\n");
-		goto release;
-	}
-	init_waitqueue_head(&tl_tmr->tl_tmr_wait);
+	init_completion(&tl_cmd->tmr_done);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
-	se_tpg = &tl_tpg->tl_se_tpg;
 	se_sess = tl_tpg->tl_nexus->se_sess;
-	/*
-	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
-	 */
-	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
-				DMA_NONE, TCM_SIMPLE_TAG,
-				&tl_cmd->tl_sense_buf[0]);
 
-	rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL);
+	rc = target_submit_tmr(se_cmd, se_sess, tl_cmd->tl_sense_buf, lun,
+			       NULL, tmr, GFP_KERNEL, task,
+			       TARGET_SCF_ACK_KREF);
 	if (rc < 0)
 		goto release;
-
-	if (tmr == TMR_ABORT_TASK)
-		se_cmd->se_tmr_req->ref_task_tag = task;
-
-	/*
-	 * Locate the underlying TCM struct se_lun
-	 */
-	if (transport_lookup_tmr_lun(se_cmd, lun) < 0) {
-		ret = TMR_LUN_DOES_NOT_EXIST;
-		goto release;
-	}
-	/*
-	 * Queue the TMR to TCM Core and sleep waiting for
-	 * tcm_loop_queue_tm_rsp() to wake us up.
-	 */
-	transport_generic_handle_tmr(se_cmd);
-	wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete));
-	/*
-	 * The TMR LUN_RESET has completed, check the response status and
-	 * then release allocations.
-	 */
+	wait_for_completion(&tl_cmd->tmr_done);
 	ret = se_cmd->se_tmr_req->response;
+	target_put_sess_cmd(se_cmd);
+
+out:
+	return ret;
+
 release:
 	if (se_cmd)
-		transport_generic_free_cmd(se_cmd, 1);
+		transport_generic_free_cmd(se_cmd, 0);
 	else
 		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
-	kfree(tl_tmr);
-	return ret;
+	goto out;
 }
 
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
@@ -669,14 +629,11 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd)
 
 static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd)
 {
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-	struct tcm_loop_tmr *tl_tmr = se_tmr->fabric_tmr_ptr;
-	/*
-	 * The SCSI EH thread will be sleeping on se_tmr->tl_tmr_wait, go ahead
-	 * and wake up the wait_queue_head_t in tcm_loop_device_reset()
-	 */
-	atomic_set(&tl_tmr->tmr_complete, 1);
-	wake_up(&tl_tmr->tl_tmr_wait);
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+
+	/* Wake up tcm_loop_issue_tmr(). */
+	complete(&tl_cmd->tmr_done);
 }
 
 static void tcm_loop_aborted_task(struct se_cmd *se_cmd)

diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index a8a230b..3acc43c 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h

@@ -16,15 +16,11 @@ struct tcm_loop_cmd {
 	/* The TCM I/O descriptor that is accessed via container_of() */
 	struct se_cmd tl_se_cmd;
 	struct work_struct work;
+	struct completion tmr_done;
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
 };
 
-struct tcm_loop_tmr {
-	atomic_t tmr_complete;
-	wait_queue_head_t tl_tmr_wait;
-};
-
 struct tcm_loop_nexus {
 	/*
 	 * Pointer to TCM session for I_T Nexus

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fc4a9c3..a91b7c2 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c

@@ -205,8 +205,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 		/*
 		 * TARGET PORT GROUP
 		 */
-		buf[off++] = ((tg_pt_gp->tg_pt_gp_id >> 8) & 0xff);
-		buf[off++] = (tg_pt_gp->tg_pt_gp_id & 0xff);
+		put_unaligned_be16(tg_pt_gp->tg_pt_gp_id, &buf[off]);
+		off += 2;
 
 		off++; /* Skip over Reserved */
 		/*
@@ -235,8 +235,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 			/*
 			 * Set RELATIVE TARGET PORT IDENTIFIER
 			 */
-			buf[off++] = ((lun->lun_rtpi >> 8) & 0xff);
-			buf[off++] = (lun->lun_rtpi & 0xff);
+			put_unaligned_be16(lun->lun_rtpi, &buf[off]);
+			off += 2;
 			rd_len += 4;
 		}
 		spin_unlock(&tg_pt_gp->tg_pt_gp_lock);

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 0326607..7e87d95 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c

@@ -1085,6 +1085,24 @@ static ssize_t block_size_store(struct config_item *item,
 	return count;
 }
 
+static ssize_t alua_support_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	u8 flags = da->da_dev->transport->transport_flags;
+
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ? 0 : 1);
+}
+
+static ssize_t pgr_support_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	u8 flags = da->da_dev->transport->transport_flags;
+
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			flags & TRANSPORT_FLAG_PASSTHROUGH_PGR ? 0 : 1);
+}
+
 CONFIGFS_ATTR(, emulate_model_alias);
 CONFIGFS_ATTR(, emulate_dpo);
 CONFIGFS_ATTR(, emulate_fua_write);
@@ -1116,6 +1134,8 @@ CONFIGFS_ATTR(, unmap_granularity);
 CONFIGFS_ATTR(, unmap_granularity_alignment);
 CONFIGFS_ATTR(, unmap_zeroes_data);
 CONFIGFS_ATTR(, max_write_same_len);
+CONFIGFS_ATTR_RO(, alua_support);
+CONFIGFS_ATTR_RO(, pgr_support);
 
 /*
  * dev_attrib attributes for devices using the target core SBC/SPC
@@ -1154,6 +1174,8 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
 	&attr_unmap_granularity_alignment,
 	&attr_unmap_zeroes_data,
 	&attr_max_write_same_len,
+	&attr_alua_support,
+	&attr_pgr_support,
 	NULL,
 };
 EXPORT_SYMBOL(sbc_attrib_attrs);
@@ -1168,6 +1190,8 @@ struct configfs_attribute *passthrough_attrib_attrs[] = {
 	&attr_hw_block_size,
 	&attr_hw_max_sectors,
 	&attr_hw_queue_depth,
+	&attr_alua_support,
+	&attr_pgr_support,
 	NULL,
 };
 EXPORT_SYMBOL(passthrough_attrib_attrs);
@@ -2236,7 +2260,11 @@ static void target_core_dev_release(struct config_item *item)
 	target_free_device(dev);
 }
 
-static struct configfs_item_operations target_core_dev_item_ops = {
+/*
+ * Used in target_core_fabric_configfs.c to verify valid se_device symlink
+ * within target_fabric_port_link()
+ */
+struct configfs_item_operations target_core_dev_item_ops = {
 	.release		= target_core_dev_release,
 };
 

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 8add07f38..e8dd6da 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c

@@ -49,8 +49,9 @@
 #include "target_core_pr.h"
 #include "target_core_ua.h"
 
-DEFINE_MUTEX(g_device_mutex);
-LIST_HEAD(g_device_list);
+static DEFINE_MUTEX(device_mutex);
+static LIST_HEAD(device_list);
+static DEFINE_IDR(devices_idr);
 
 static struct se_hba *lun0_hba;
 /* not static, needed by tpg.c */
@@ -168,11 +169,20 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 	rcu_read_lock();
 	deve = target_nacl_find_deve(nacl, unpacked_lun);
 	if (deve) {
-		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_lun = rcu_dereference(deve->se_lun);
+
+		if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
+			se_lun = NULL;
+			goto out_unlock;
+		}
+
+		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
+		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+		se_cmd->lun_ref_active = true;
 	}
+out_unlock:
 	rcu_read_unlock();
 
 	if (!se_lun) {
@@ -182,9 +192,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 			unpacked_lun);
 		return -ENODEV;
 	}
-	/*
-	 * XXX: Add percpu se_lun->lun_ref reference count for TMR
-	 */
 	se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 	se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev);
 
@@ -756,19 +763,16 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	if (!dev)
 		return NULL;
 
-	dev->dev_link_magic = SE_DEV_LINK_MAGIC;
 	dev->se_hba = hba;
 	dev->transport = hba->backend->ops;
 	dev->prot_length = sizeof(struct t10_pi_tuple);
 	dev->hba_index = hba->hba_index;
 
-	INIT_LIST_HEAD(&dev->dev_list);
 	INIT_LIST_HEAD(&dev->dev_sep_list);
 	INIT_LIST_HEAD(&dev->dev_tmr_list);
 	INIT_LIST_HEAD(&dev->delayed_cmd_list);
 	INIT_LIST_HEAD(&dev->state_list);
 	INIT_LIST_HEAD(&dev->qf_cmd_list);
-	INIT_LIST_HEAD(&dev->g_dev_node);
 	spin_lock_init(&dev->execute_task_lock);
 	spin_lock_init(&dev->delayed_cmd_lock);
 	spin_lock_init(&dev->dev_reservation_lock);
@@ -851,7 +855,7 @@ bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 	attrib->unmap_granularity = q->limits.discard_granularity / block_size;
 	attrib->unmap_granularity_alignment = q->limits.discard_alignment /
 								block_size;
-	attrib->unmap_zeroes_data = 0;
+	attrib->unmap_zeroes_data = (q->limits.max_write_zeroes_sectors);
 	return true;
 }
 EXPORT_SYMBOL(target_configure_unmap_from_queue);
@@ -875,10 +879,79 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
 }
 EXPORT_SYMBOL(target_to_linux_sector);
 
+/**
+ * target_find_device - find a se_device by its dev_index
+ * @id: dev_index
+ * @do_depend: true if caller needs target_depend_item to be done
+ *
+ * If do_depend is true, the caller must do a target_undepend_item
+ * when finished using the device.
+ *
+ * If do_depend is false, the caller must be called in a configfs
+ * callback or during removal.
+ */
+struct se_device *target_find_device(int id, bool do_depend)
+{
+	struct se_device *dev;
+
+	mutex_lock(&device_mutex);
+	dev = idr_find(&devices_idr, id);
+	if (dev && do_depend && target_depend_item(&dev->dev_group.cg_item))
+		dev = NULL;
+	mutex_unlock(&device_mutex);
+	return dev;
+}
+EXPORT_SYMBOL(target_find_device);
+
+struct devices_idr_iter {
+	int (*fn)(struct se_device *dev, void *data);
+	void *data;
+};
+
+static int target_devices_idr_iter(int id, void *p, void *data)
+{
+	struct devices_idr_iter *iter = data;
+	struct se_device *dev = p;
+
+	/*
+	 * We add the device early to the idr, so it can be used
+	 * by backend modules during configuration. We do not want
+	 * to allow other callers to access partially setup devices,
+	 * so we skip them here.
+	 */
+	if (!(dev->dev_flags & DF_CONFIGURED))
+		return 0;
+
+	return iter->fn(dev, iter->data);
+}
+
+/**
+ * target_for_each_device - iterate over configured devices
+ * @fn: iterator function
+ * @data: pointer to data that will be passed to fn
+ *
+ * fn must return 0 to continue looping over devices. non-zero will break
+ * from the loop and return that value to the caller.
+ */
+int target_for_each_device(int (*fn)(struct se_device *dev, void *data),
+			   void *data)
+{
+	struct devices_idr_iter iter;
+	int ret;
+
+	iter.fn = fn;
+	iter.data = data;
+
+	mutex_lock(&device_mutex);
+	ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter);
+	mutex_unlock(&device_mutex);
+	return ret;
+}
+
 int target_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
-	int ret;
+	int ret, id;
 
 	if (dev->dev_flags & DF_CONFIGURED) {
 		pr_err("se_dev->se_dev_ptr already set for storage"
@@ -886,9 +959,26 @@ int target_configure_device(struct se_device *dev)
 		return -EEXIST;
 	}
 
+	/*
+	 * Add early so modules like tcmu can use during its
+	 * configuration.
+	 */
+	mutex_lock(&device_mutex);
+	/*
+	 * Use cyclic to try and avoid collisions with devices
+	 * that were recently removed.
+	 */
+	id = idr_alloc_cyclic(&devices_idr, dev, 0, INT_MAX, GFP_KERNEL);
+	mutex_unlock(&device_mutex);
+	if (id < 0) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	dev->dev_index = id;
+
 	ret = dev->transport->configure_device(dev);
 	if (ret)
-		goto out;
+		goto out_free_index;
 	/*
 	 * XXX: there is not much point to have two different values here..
 	 */
@@ -903,12 +993,11 @@ int target_configure_device(struct se_device *dev)
 					 dev->dev_attrib.hw_block_size);
 	dev->dev_attrib.optimal_sectors = dev->dev_attrib.hw_max_sectors;
 
-	dev->dev_index = scsi_get_new_index(SCSI_DEVICE_INDEX);
 	dev->creation_time = get_jiffies_64();
 
 	ret = core_setup_alua(dev);
 	if (ret)
-		goto out;
+		goto out_free_index;
 
 	/*
 	 * Startup the struct se_device processing thread
@@ -946,16 +1035,16 @@ int target_configure_device(struct se_device *dev)
 	hba->dev_count++;
 	spin_unlock(&hba->device_lock);
 
-	mutex_lock(&g_device_mutex);
-	list_add_tail(&dev->g_dev_node, &g_device_list);
-	mutex_unlock(&g_device_mutex);
-
 	dev->dev_flags |= DF_CONFIGURED;
 
 	return 0;
 
 out_free_alua:
 	core_alua_free_lu_gp_mem(dev);
+out_free_index:
+	mutex_lock(&device_mutex);
+	idr_remove(&devices_idr, dev->dev_index);
+	mutex_unlock(&device_mutex);
 out:
 	se_release_vpd_for_dev(dev);
 	return ret;
@@ -970,9 +1059,11 @@ void target_free_device(struct se_device *dev)
 	if (dev->dev_flags & DF_CONFIGURED) {
 		destroy_workqueue(dev->tmr_wq);
 
-		mutex_lock(&g_device_mutex);
-		list_del(&dev->g_dev_node);
-		mutex_unlock(&g_device_mutex);
+		dev->transport->destroy_device(dev);
+
+		mutex_lock(&device_mutex);
+		idr_remove(&devices_idr, dev->dev_index);
+		mutex_unlock(&device_mutex);
 
 		spin_lock(&hba->device_lock);
 		hba->dev_count--;
@@ -1087,19 +1178,19 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	      TRANSPORT_FLAG_PASSTHROUGH_PGR)) {
 		if (cdb[0] == PERSISTENT_RESERVE_IN) {
 			cmd->execute_cmd = target_scsi3_emulate_pr_in;
-			size = (cdb[7] << 8) + cdb[8];
+			size = get_unaligned_be16(&cdb[7]);
 			return target_cmd_size_check(cmd, size);
 		}
 		if (cdb[0] == PERSISTENT_RESERVE_OUT) {
 			cmd->execute_cmd = target_scsi3_emulate_pr_out;
-			size = (cdb[7] << 8) + cdb[8];
+			size = get_unaligned_be32(&cdb[5]);
 			return target_cmd_size_check(cmd, size);
 		}
 
 		if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) {
 			cmd->execute_cmd = target_scsi2_reservation_release;
 			if (cdb[0] == RELEASE_10)
-				size = (cdb[7] << 8) | cdb[8];
+				size = get_unaligned_be16(&cdb[7]);
 			else
 				size = cmd->data_length;
 			return target_cmd_size_check(cmd, size);
@@ -1107,7 +1198,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) {
 			cmd->execute_cmd = target_scsi2_reservation_reserve;
 			if (cdb[0] == RESERVE_10)
-				size = (cdb[7] << 8) | cdb[8];
+				size = get_unaligned_be16(&cdb[7]);
 			else
 				size = cmd->data_length;
 			return target_cmd_size_check(cmd, size);
@@ -1126,7 +1217,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 	case WRITE_16:
 	case WRITE_VERIFY:
 	case WRITE_VERIFY_12:
-	case 0x8e: /* WRITE_VERIFY_16 */
+	case WRITE_VERIFY_16:
 	case COMPARE_AND_WRITE:
 	case XDWRITEREAD_10:
 		cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
@@ -1135,7 +1226,7 @@ passthrough_parse_cdb(struct se_cmd *cmd,
 		switch (get_unaligned_be16(&cdb[8])) {
 		case READ_32:
 		case WRITE_32:
-		case 0x0c: /* WRITE_VERIFY_32 */
+		case WRITE_VERIFY_32:
 		case XDWRITEREAD_32:
 			cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
 			break;

diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index d1e6cab..e9e917c 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c

@@ -65,6 +65,8 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf)
 	pr_debug("Setup generic %s\n", __stringify(_name));		\
 }
 
+static struct configfs_item_operations target_fabric_port_item_ops;
+
 /* Start of tfc_tpg_mappedlun_cit */
 
 static int target_fabric_mappedlun_link(
@@ -72,19 +74,20 @@ static int target_fabric_mappedlun_link(
 	struct config_item *lun_ci)
 {
 	struct se_dev_entry *deve;
-	struct se_lun *lun = container_of(to_config_group(lun_ci),
-			struct se_lun, lun_group);
+	struct se_lun *lun;
 	struct se_lun_acl *lacl = container_of(to_config_group(lun_acl_ci),
 			struct se_lun_acl, se_lun_group);
 	struct se_portal_group *se_tpg;
 	struct config_item *nacl_ci, *tpg_ci, *tpg_ci_s, *wwn_ci, *wwn_ci_s;
 	bool lun_access_ro;
 
-	if (lun->lun_link_magic != SE_LUN_LINK_MAGIC) {
-		pr_err("Bad lun->lun_link_magic, not a valid lun_ci pointer:"
-			" %p to struct lun: %p\n", lun_ci, lun);
+	if (!lun_ci->ci_type ||
+	    lun_ci->ci_type->ct_item_ops != &target_fabric_port_item_ops) {
+		pr_err("Bad lun_ci, not a valid lun_ci pointer: %p\n", lun_ci);
 		return -EFAULT;
 	}
+	lun = container_of(to_config_group(lun_ci), struct se_lun, lun_group);
+
 	/*
 	 * Ensure that the source port exists
 	 */
@@ -620,6 +623,8 @@ static struct configfs_attribute *target_fabric_port_attrs[] = {
 	NULL,
 };
 
+extern struct configfs_item_operations target_core_dev_item_ops;
+
 static int target_fabric_port_link(
 	struct config_item *lun_ci,
 	struct config_item *se_dev_ci)
@@ -628,16 +633,16 @@ static int target_fabric_port_link(
 	struct se_lun *lun = container_of(to_config_group(lun_ci),
 				struct se_lun, lun_group);
 	struct se_portal_group *se_tpg;
-	struct se_device *dev =
-		container_of(to_config_group(se_dev_ci), struct se_device, dev_group);
+	struct se_device *dev;
 	struct target_fabric_configfs *tf;
 	int ret;
 
-	if (dev->dev_link_magic != SE_DEV_LINK_MAGIC) {
-		pr_err("Bad dev->dev_link_magic, not a valid se_dev_ci pointer:"
-			" %p to struct se_device: %p\n", se_dev_ci, dev);
+	if (!se_dev_ci->ci_type ||
+	    se_dev_ci->ci_type->ct_item_ops != &target_core_dev_item_ops) {
+		pr_err("Bad se_dev_ci, not a valid se_dev_ci pointer: %p\n", se_dev_ci);
 		return -EFAULT;
 	}
+	dev = container_of(to_config_group(se_dev_ci), struct se_device, dev_group);
 
 	if (!(dev->dev_flags & DF_CONFIGURED)) {
 		pr_err("se_device not configured yet, cannot port link\n");

diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index cb6497c..508da34 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c

@@ -34,6 +34,7 @@
 #include <linux/ctype.h>
 #include <linux/spinlock.h>
 #include <linux/export.h>
+#include <asm/unaligned.h>
 
 #include <scsi/scsi_proto.h>
 
@@ -216,8 +217,7 @@ static int iscsi_get_pr_transport_id(
 	if (padding != 0)
 		len += padding;
 
-	buf[2] = ((len >> 8) & 0xff);
-	buf[3] = (len & 0xff);
+	put_unaligned_be16(len, &buf[2]);
 	/*
 	 * Increment value for total payload + header length for
 	 * full status descriptor
@@ -306,7 +306,7 @@ static char *iscsi_parse_pr_out_transport_id(
 	 */
 	if (out_tid_len) {
 		/* The shift works thanks to integer promotion rules */
-		add_len = (buf[2] << 8) | buf[3];
+		add_len = get_unaligned_be16(&buf[2]);
 
 		tid_len = strlen(&buf[4]);
 		tid_len += 4; /* Add four bytes for iSCSI Transport ID header */

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index e921948..24cf11d 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c

@@ -237,13 +237,17 @@ static void fd_dev_call_rcu(struct rcu_head *p)
 
 static void fd_free_device(struct se_device *dev)
 {
+	call_rcu(&dev->rcu_head, fd_dev_call_rcu);
+}
+
+static void fd_destroy_device(struct se_device *dev)
+{
 	struct fd_dev *fd_dev = FD_DEV(dev);
 
 	if (fd_dev->fd_file) {
 		filp_close(fd_dev->fd_file, NULL);
 		fd_dev->fd_file = NULL;
 	}
-	call_rcu(&dev->rcu_head, fd_dev_call_rcu);
 }
 
 static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
@@ -826,6 +830,7 @@ static const struct target_backend_ops fileio_ops = {
 	.detach_hba		= fd_detach_hba,
 	.alloc_device		= fd_alloc_device,
 	.configure_device	= fd_configure_device,
+	.destroy_device		= fd_destroy_device,
 	.free_device		= fd_free_device,
 	.parse_cdb		= fd_parse_cdb,
 	.set_configfs_dev_params = fd_set_configfs_dev_params,

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c05d3801..ee7c7fa 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c

@@ -86,6 +86,7 @@ static int iblock_configure_device(struct se_device *dev)
 	struct block_device *bd = NULL;
 	struct blk_integrity *bi;
 	fmode_t mode;
+	unsigned int max_write_zeroes_sectors;
 	int ret = -ENOMEM;
 
 	if (!(ib_dev->ibd_flags & IBDF_HAS_UDEV_PATH)) {
@@ -129,7 +130,11 @@ static int iblock_configure_device(struct se_device *dev)
 	 * Enable write same emulation for IBLOCK and use 0xFFFF as
 	 * the smaller WRITE_SAME(10) only has a two-byte block count.
 	 */
-	dev->dev_attrib.max_write_same_len = 0xFFFF;
+	max_write_zeroes_sectors = bdev_write_zeroes_sectors(bd);
+	if (max_write_zeroes_sectors)
+		dev->dev_attrib.max_write_same_len = max_write_zeroes_sectors;
+	else
+		dev->dev_attrib.max_write_same_len = 0xFFFF;
 
 	if (blk_queue_nonrot(q))
 		dev->dev_attrib.is_nonrot = 1;
@@ -185,14 +190,17 @@ static void iblock_dev_call_rcu(struct rcu_head *p)
 
 static void iblock_free_device(struct se_device *dev)
 {
+	call_rcu(&dev->rcu_head, iblock_dev_call_rcu);
+}
+
+static void iblock_destroy_device(struct se_device *dev)
+{
 	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 
 	if (ib_dev->ibd_bd != NULL)
 		blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
 	if (ib_dev->ibd_bio_set != NULL)
 		bioset_free(ib_dev->ibd_bio_set);
-
-	call_rcu(&dev->rcu_head, iblock_dev_call_rcu);
 }
 
 static unsigned long long iblock_emulate_read_cap_with_block_size(
@@ -415,28 +423,31 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 }
 
 static sense_reason_t
-iblock_execute_write_same_direct(struct block_device *bdev, struct se_cmd *cmd)
+iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
 	struct scatterlist *sg = &cmd->t_data_sg[0];
-	struct page *page = NULL;
-	int ret;
+	unsigned char *buf, zero = 0x00, *p = &zero;
+	int rc, ret;
 
-	if (sg->offset) {
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			return TCM_OUT_OF_RESOURCES;
-		sg_copy_to_buffer(sg, cmd->t_data_nents, page_address(page),
-				  dev->dev_attrib.block_size);
-	}
+	buf = kmap(sg_page(sg)) + sg->offset;
+	if (!buf)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	/*
+	 * Fall back to block_execute_write_same() slow-path if
+	 * incoming WRITE_SAME payload does not contain zeros.
+	 */
+	rc = memcmp(buf, p, cmd->data_length);
+	kunmap(sg_page(sg));
 
-	ret = blkdev_issue_write_same(bdev,
+	if (rc)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+	ret = blkdev_issue_zeroout(bdev,
 				target_to_linux_sector(dev, cmd->t_task_lba),
 				target_to_linux_sector(dev,
 					sbc_get_write_same_sectors(cmd)),
-				GFP_KERNEL, page ? page : sg_page(sg));
-	if (page)
-		__free_page(page);
+				GFP_KERNEL, false);
 	if (ret)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
@@ -472,8 +483,10 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		return TCM_INVALID_CDB_FIELD;
 	}
 
-	if (bdev_write_same(bdev))
-		return iblock_execute_write_same_direct(bdev, cmd);
+	if (bdev_write_zeroes_sectors(bdev)) {
+		if (!iblock_execute_zero_out(bdev, cmd))
+			return 0;
+	}
 
 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
 	if (!ibr)
@@ -848,6 +861,7 @@ static const struct target_backend_ops iblock_ops = {
 	.detach_hba		= iblock_detach_hba,
 	.alloc_device		= iblock_alloc_device,
 	.configure_device	= iblock_configure_device,
+	.destroy_device		= iblock_destroy_device,
 	.free_device		= iblock_free_device,
 	.parse_cdb		= iblock_parse_cdb,
 	.set_configfs_dev_params = iblock_set_configfs_dev_params,

diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 0912de7..f30e8ac 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h

@@ -56,9 +56,6 @@ struct target_fabric_configfs {
 extern struct t10_alua_lu_gp *default_lu_gp;
 
 /* target_core_device.c */
-extern struct mutex g_device_mutex;
-extern struct list_head g_device_list;
-
 int	core_alloc_rtpi(struct se_lun *lun, struct se_device *dev);
 struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
 void	target_pr_kref_release(struct kref *);
@@ -87,6 +84,8 @@ void	core_dev_release_virtual_lun0(void);
 struct se_device *target_alloc_device(struct se_hba *hba, const char *name);
 int	target_configure_device(struct se_device *dev);
 void	target_free_device(struct se_device *);
+int	target_for_each_device(int (*fn)(struct se_device *dev, void *data),
+			       void *data);
 
 /* target_core_configfs.c */
 void	target_setup_backend_cits(struct target_backend *);

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 129ca57..6d5def6 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c

@@ -1562,10 +1562,7 @@ core_scsi3_decode_spec_i_port(
 	 * first extract TransportID Parameter Data Length, and make sure
 	 * the value matches up to the SCSI expected data transfer length.
 	 */
-	tpdl = (buf[24] & 0xff) << 24;
-	tpdl |= (buf[25] & 0xff) << 16;
-	tpdl |= (buf[26] & 0xff) << 8;
-	tpdl |= buf[27] & 0xff;
+	tpdl = get_unaligned_be32(&buf[24]);
 
 	if ((tpdl + 28) != cmd->data_length) {
 		pr_err("SPC-3 PR: Illegal tpdl: %u + 28 byte header"
@@ -3221,12 +3218,8 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 		goto out_put_pr_reg;
 	}
 
-	rtpi = (buf[18] & 0xff) << 8;
-	rtpi |= buf[19] & 0xff;
-	tid_len = (buf[20] & 0xff) << 24;
-	tid_len |= (buf[21] & 0xff) << 16;
-	tid_len |= (buf[22] & 0xff) << 8;
-	tid_len |= buf[23] & 0xff;
+	rtpi = get_unaligned_be16(&buf[18]);
+	tid_len = get_unaligned_be32(&buf[20]);
 	transport_kunmap_data_sg(cmd);
 	buf = NULL;
 
@@ -3552,16 +3545,6 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 	return ret;
 }
 
-static unsigned long long core_scsi3_extract_reservation_key(unsigned char *cdb)
-{
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[0] << 24) | (cdb[1] << 16) | (cdb[2] << 8) | cdb[3];
-	__v2 = (cdb[4] << 24) | (cdb[5] << 16) | (cdb[6] << 8) | cdb[7];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
-}
-
 /*
  * See spc4r17 section 6.14 Table 170
  */
@@ -3602,7 +3585,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	if (cmd->data_length < 24) {
 		pr_warn("SPC-PR: Received PR OUT parameter list"
 			" length too small: %u\n", cmd->data_length);
-		return TCM_INVALID_PARAMETER_LIST;
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
 	}
 
 	/*
@@ -3619,8 +3602,8 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	/*
 	 * From PERSISTENT_RESERVE_OUT parameter list (payload)
 	 */
-	res_key = core_scsi3_extract_reservation_key(&buf[0]);
-	sa_res_key = core_scsi3_extract_reservation_key(&buf[8]);
+	res_key = get_unaligned_be64(&buf[0]);
+	sa_res_key = get_unaligned_be64(&buf[8]);
 	/*
 	 * REGISTER_AND_MOVE uses a different SA parameter list containing
 	 * SCSI TransportIDs.
@@ -3646,7 +3629,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	/*
 	 * SPEC_I_PT=1 is only valid for Service action: REGISTER
 	 */
-	if (spec_i_pt && ((cdb[1] & 0x1f) != PRO_REGISTER))
+	if (spec_i_pt && (sa != PRO_REGISTER))
 		return TCM_INVALID_PARAMETER_LIST;
 
 	/*
@@ -3658,11 +3641,11 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 	 * the sense key set to ILLEGAL REQUEST, and the additional sense
 	 * code set to PARAMETER LIST LENGTH ERROR.
 	 */
-	if (!spec_i_pt && ((cdb[1] & 0x1f) != PRO_REGISTER_AND_MOVE) &&
+	if (!spec_i_pt && (sa != PRO_REGISTER_AND_MOVE) &&
 	    (cmd->data_length != 24)) {
 		pr_warn("SPC-PR: Received PR OUT illegal parameter"
 			" list length: %u\n", cmd->data_length);
-		return TCM_INVALID_PARAMETER_LIST;
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
 	}
 
 	/*
@@ -3702,7 +3685,7 @@ target_scsi3_emulate_pr_out(struct se_cmd *cmd)
 		break;
 	default:
 		pr_err("Unknown PERSISTENT_RESERVE_OUT service"
-			" action: 0x%02x\n", cdb[1] & 0x1f);
+			" action: 0x%02x\n", sa);
 		return TCM_INVALID_CDB_FIELD;
 	}
 
@@ -3734,10 +3717,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, buf);
 
 	spin_lock(&dev->t10_pr.registration_lock);
 	list_for_each_entry(pr_reg, &dev->t10_pr.registration_list,
@@ -3749,23 +3729,13 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 		if ((add_len + 8) > (cmd->data_length - 8))
 			break;
 
-		buf[off++] = ((pr_reg->pr_res_key >> 56) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 48) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 40) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 32) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 24) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 16) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 8) & 0xff);
-		buf[off++] = (pr_reg->pr_res_key & 0xff);
-
+		put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+		off += 8;
 		add_len += 8;
 	}
 	spin_unlock(&dev->t10_pr.registration_lock);
 
-	buf[4] = ((add_len >> 24) & 0xff);
-	buf[5] = ((add_len >> 16) & 0xff);
-	buf[6] = ((add_len >> 8) & 0xff);
-	buf[7] = (add_len & 0xff);
+	put_unaligned_be32(add_len, &buf[4]);
 
 	transport_kunmap_data_sg(cmd);
 
@@ -3796,10 +3766,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, &buf[0]);
 
 	spin_lock(&dev->dev_reservation_lock);
 	pr_reg = dev->dev_pr_res_holder;
@@ -3807,10 +3774,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 		/*
 		 * Set the hardcoded Additional Length
 		 */
-		buf[4] = ((add_len >> 24) & 0xff);
-		buf[5] = ((add_len >> 16) & 0xff);
-		buf[6] = ((add_len >> 8) & 0xff);
-		buf[7] = (add_len & 0xff);
+		put_unaligned_be32(add_len, &buf[4]);
 
 		if (cmd->data_length < 22)
 			goto err;
@@ -3837,14 +3801,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
 		else
 			pr_res_key = pr_reg->pr_res_key;
 
-		buf[8] = ((pr_res_key >> 56) & 0xff);
-		buf[9] = ((pr_res_key >> 48) & 0xff);
-		buf[10] = ((pr_res_key >> 40) & 0xff);
-		buf[11] = ((pr_res_key >> 32) & 0xff);
-		buf[12] = ((pr_res_key >> 24) & 0xff);
-		buf[13] = ((pr_res_key >> 16) & 0xff);
-		buf[14] = ((pr_res_key >> 8) & 0xff);
-		buf[15] = (pr_res_key & 0xff);
+		put_unaligned_be64(pr_res_key, &buf[8]);
 		/*
 		 * Set the SCOPE and TYPE
 		 */
@@ -3882,8 +3839,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((add_len >> 8) & 0xff);
-	buf[1] = (add_len & 0xff);
+	put_unaligned_be16(add_len, &buf[0]);
 	buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
 	buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
 	buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */
@@ -3947,10 +3903,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	if (!buf)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	buf[0] = ((dev->t10_pr.pr_generation >> 24) & 0xff);
-	buf[1] = ((dev->t10_pr.pr_generation >> 16) & 0xff);
-	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
-	buf[3] = (dev->t10_pr.pr_generation & 0xff);
+	put_unaligned_be32(dev->t10_pr.pr_generation, &buf[0]);
 
 	spin_lock(&dev->dev_reservation_lock);
 	if (dev->dev_pr_res_holder) {
@@ -3992,14 +3945,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		/*
 		 * Set RESERVATION KEY
 		 */
-		buf[off++] = ((pr_reg->pr_res_key >> 56) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 48) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 40) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 32) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 24) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 16) & 0xff);
-		buf[off++] = ((pr_reg->pr_res_key >> 8) & 0xff);
-		buf[off++] = (pr_reg->pr_res_key & 0xff);
+		put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+		off += 8;
 		off += 4; /* Skip Over Reserved area */
 
 		/*
@@ -4041,8 +3988,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		if (!pr_reg->pr_reg_all_tg_pt) {
 			u16 sep_rtpi = pr_reg->tg_pt_sep_rtpi;
 
-			buf[off++] = ((sep_rtpi >> 8) & 0xff);
-			buf[off++] = (sep_rtpi & 0xff);
+			put_unaligned_be16(sep_rtpi, &buf[off]);
+			off += 2;
 		} else
 			off += 2; /* Skip over RELATIVE TARGET PORT IDENTIFIER */
 
@@ -4062,10 +4009,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		/*
 		 * Set the ADDITIONAL DESCRIPTOR LENGTH
 		 */
-		buf[off++] = ((desc_len >> 24) & 0xff);
-		buf[off++] = ((desc_len >> 16) & 0xff);
-		buf[off++] = ((desc_len >> 8) & 0xff);
-		buf[off++] = (desc_len & 0xff);
+		put_unaligned_be32(desc_len, &buf[off]);
 		/*
 		 * Size of full desctipor header minus TransportID
 		 * containing $FABRIC_MOD specific) initiator device/port
@@ -4082,10 +4026,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 	/*
 	 * Set ADDITIONAL_LENGTH
 	 */
-	buf[4] = ((add_len >> 24) & 0xff);
-	buf[5] = ((add_len >> 16) & 0xff);
-	buf[6] = ((add_len >> 8) & 0xff);
-	buf[7] = (add_len & 0xff);
+	put_unaligned_be32(add_len, &buf[4]);
 
 	transport_kunmap_data_sg(cmd);
 

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index ceec021..7c69b4a 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c

@@ -168,7 +168,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 	/*
 	 * If MODE_SENSE still returns zero, set the default value to 1024.
 	 */
-	sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+	sdev->sector_size = get_unaligned_be24(&buf[9]);
 out_free:
 	if (!sdev->sector_size)
 		sdev->sector_size = 1024;
@@ -209,8 +209,7 @@ pscsi_get_inquiry_vpd_serial(struct scsi_device *sdev, struct t10_wwn *wwn)
 	cdb[0] = INQUIRY;
 	cdb[1] = 0x01; /* Query VPD */
 	cdb[2] = 0x80; /* Unit Serial Number */
-	cdb[3] = (INQUIRY_VPD_SERIAL_LEN >> 8) & 0xff;
-	cdb[4] = (INQUIRY_VPD_SERIAL_LEN & 0xff);
+	put_unaligned_be16(INQUIRY_VPD_SERIAL_LEN, &cdb[3]);
 
 	ret = scsi_execute_req(sdev, cdb, DMA_FROM_DEVICE, buf,
 			      INQUIRY_VPD_SERIAL_LEN, NULL, HZ, 1, NULL);
@@ -245,8 +244,7 @@ pscsi_get_inquiry_vpd_device_ident(struct scsi_device *sdev,
 	cdb[0] = INQUIRY;
 	cdb[1] = 0x01; /* Query VPD */
 	cdb[2] = 0x83; /* Device Identifier */
-	cdb[3] = (INQUIRY_VPD_DEVICE_IDENTIFIER_LEN >> 8) & 0xff;
-	cdb[4] = (INQUIRY_VPD_DEVICE_IDENTIFIER_LEN & 0xff);
+	put_unaligned_be16(INQUIRY_VPD_DEVICE_IDENTIFIER_LEN, &cdb[3]);
 
 	ret = scsi_execute_req(sdev, cdb, DMA_FROM_DEVICE, buf,
 			      INQUIRY_VPD_DEVICE_IDENTIFIER_LEN,
@@ -254,7 +252,7 @@ pscsi_get_inquiry_vpd_device_ident(struct scsi_device *sdev,
 	if (ret)
 		goto out;
 
-	page_len = (buf[2] << 8) | buf[3];
+	page_len = get_unaligned_be16(&buf[2]);
 	while (page_len > 0) {
 		/* Grab a pointer to the Identification descriptor */
 		page_83 = &buf[off];
@@ -384,7 +382,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 	spin_unlock_irq(sh->host_lock);
 	/*
 	 * Claim exclusive struct block_device access to struct scsi_device
-	 * for TYPE_DISK using supplied udev_path
+	 * for TYPE_DISK and TYPE_ZBC using supplied udev_path
 	 */
 	bd = blkdev_get_by_path(dev->udev_path,
 				FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv);
@@ -402,8 +400,9 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 		return ret;
 	}
 
-	pr_debug("CORE_PSCSI[%d] - Added TYPE_DISK for %d:%d:%d:%llu\n",
-		phv->phv_host_id, sh->host_no, sd->channel, sd->id, sd->lun);
+	pr_debug("CORE_PSCSI[%d] - Added TYPE_%s for %d:%d:%d:%llu\n",
+		phv->phv_host_id, sd->type == TYPE_DISK ? "DISK" : "ZBC",
+		sh->host_no, sd->channel, sd->id, sd->lun);
 	return 0;
 }
 
@@ -522,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev)
 		 */
 		switch (sd->type) {
 		case TYPE_DISK:
+		case TYPE_ZBC:
 			ret = pscsi_create_type_disk(dev, sd);
 			break;
 		default:
@@ -566,6 +566,11 @@ static void pscsi_dev_call_rcu(struct rcu_head *p)
 
 static void pscsi_free_device(struct se_device *dev)
 {
+	call_rcu(&dev->rcu_head, pscsi_dev_call_rcu);
+}
+
+static void pscsi_destroy_device(struct se_device *dev)
+{
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(dev);
 	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
 	struct scsi_device *sd = pdv->pdv_sd;
@@ -573,9 +578,11 @@ static void pscsi_free_device(struct se_device *dev)
 	if (sd) {
 		/*
 		 * Release exclusive pSCSI internal struct block_device claim for
-		 * struct scsi_device with TYPE_DISK from pscsi_create_type_disk()
+		 * struct scsi_device with TYPE_DISK or TYPE_ZBC
+		 * from pscsi_create_type_disk()
 		 */
-		if ((sd->type == TYPE_DISK) && pdv->pdv_bd) {
+		if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) &&
+		    pdv->pdv_bd) {
 			blkdev_put(pdv->pdv_bd,
 				   FMODE_WRITE|FMODE_READ|FMODE_EXCL);
 			pdv->pdv_bd = NULL;
@@ -594,15 +601,13 @@ static void pscsi_free_device(struct se_device *dev)
 
 		pdv->pdv_sd = NULL;
 	}
-	call_rcu(&dev->rcu_head, pscsi_dev_call_rcu);
 }
 
-static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
-				     unsigned char *sense_buffer)
+static void pscsi_complete_cmd(struct se_cmd *cmd, u8 scsi_status,
+			       unsigned char *req_sense)
 {
 	struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
 	struct scsi_device *sd = pdv->pdv_sd;
-	int result;
 	struct pscsi_plugin_task *pt = cmd->priv;
 	unsigned char *cdb;
 	/*
@@ -613,7 +618,6 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 		return;
 
 	cdb = &pt->pscsi_cdb[0];
-	result = pt->pscsi_result;
 	/*
 	 * Hack to make sure that Write-Protect modepage is set if R/O mode is
 	 * forced.
@@ -622,7 +626,7 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 		goto after_mode_sense;
 
 	if (((cdb[0] == MODE_SENSE) || (cdb[0] == MODE_SENSE_10)) &&
-	     (status_byte(result) << 1) == SAM_STAT_GOOD) {
+	    scsi_status == SAM_STAT_GOOD) {
 		bool read_only = target_lun_is_rdonly(cmd);
 
 		if (read_only) {
@@ -657,40 +661,36 @@ static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
 	 * storage engine.
 	 */
 	if (((cdb[0] == MODE_SELECT) || (cdb[0] == MODE_SELECT_10)) &&
-	      (status_byte(result) << 1) == SAM_STAT_GOOD) {
+	     scsi_status == SAM_STAT_GOOD) {
 		unsigned char *buf;
 		u16 bdl;
 		u32 blocksize;
 
-		buf = sg_virt(&sg[0]);
+		buf = sg_virt(&cmd->t_data_sg[0]);
 		if (!buf) {
 			pr_err("Unable to get buf for scatterlist\n");
 			goto after_mode_select;
 		}
 
 		if (cdb[0] == MODE_SELECT)
-			bdl = (buf[3]);
+			bdl = buf[3];
 		else
-			bdl = (buf[6] << 8) | (buf[7]);
+			bdl = get_unaligned_be16(&buf[6]);
 
 		if (!bdl)
 			goto after_mode_select;
 
 		if (cdb[0] == MODE_SELECT)
-			blocksize = (buf[9] << 16) | (buf[10] << 8) |
-					(buf[11]);
+			blocksize = get_unaligned_be24(&buf[9]);
 		else
-			blocksize = (buf[13] << 16) | (buf[14] << 8) |
-					(buf[15]);
+			blocksize = get_unaligned_be24(&buf[13]);
 
 		sd->sector_size = blocksize;
 	}
 after_mode_select:
 
-	if (sense_buffer && (status_byte(result) & CHECK_CONDITION)) {
-		memcpy(sense_buffer, pt->pscsi_sense, TRANSPORT_SENSE_BUFFER);
-		cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
-	}
+	if (scsi_status == SAM_STAT_CHECK_CONDITION)
+		transport_copy_sense_to_cmd(cmd, req_sense);
 }
 
 enum {
@@ -1002,7 +1002,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 	req->end_io_data = cmd;
 	scsi_req(req)->cmd_len = scsi_command_size(pt->pscsi_cdb);
 	scsi_req(req)->cmd = &pt->pscsi_cdb[0];
-	if (pdv->pdv_sd->type == TYPE_DISK)
+	if (pdv->pdv_sd->type == TYPE_DISK ||
+	    pdv->pdv_sd->type == TYPE_ZBC)
 		req->timeout = PS_TIMEOUT_DISK;
 	else
 		req->timeout = PS_TIMEOUT_OTHER;
@@ -1047,30 +1048,29 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
 {
 	struct se_cmd *cmd = req->end_io_data;
 	struct pscsi_plugin_task *pt = cmd->priv;
+	int result = scsi_req(req)->result;
+	u8 scsi_status = status_byte(result) << 1;
 
-	pt->pscsi_result = scsi_req(req)->result;
-	pt->pscsi_resid = scsi_req(req)->resid_len;
-
-	cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
-	if (cmd->scsi_status) {
+	if (scsi_status) {
 		pr_debug("PSCSI Status Byte exception at cmd: %p CDB:"
 			" 0x%02x Result: 0x%08x\n", cmd, pt->pscsi_cdb[0],
-			pt->pscsi_result);
+			result);
 	}
 
-	switch (host_byte(pt->pscsi_result)) {
+	pscsi_complete_cmd(cmd, scsi_status, scsi_req(req)->sense);
+
+	switch (host_byte(result)) {
 	case DID_OK:
-		target_complete_cmd(cmd, cmd->scsi_status);
+		target_complete_cmd(cmd, scsi_status);
 		break;
 	default:
 		pr_debug("PSCSI Host Byte exception at cmd: %p CDB:"
 			" 0x%02x Result: 0x%08x\n", cmd, pt->pscsi_cdb[0],
-			pt->pscsi_result);
+			result);
 		target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
 		break;
 	}
 
-	memcpy(pt->pscsi_sense, scsi_req(req)->sense, TRANSPORT_SENSE_BUFFER);
 	__blk_put_request(req->q, req);
 	kfree(pt);
 }
@@ -1086,8 +1086,8 @@ static const struct target_backend_ops pscsi_ops = {
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
 	.alloc_device		= pscsi_alloc_device,
 	.configure_device	= pscsi_configure_device,
+	.destroy_device		= pscsi_destroy_device,
 	.free_device		= pscsi_free_device,
-	.transport_complete	= pscsi_transport_complete,
 	.parse_cdb		= pscsi_parse_cdb,
 	.set_configfs_dev_params = pscsi_set_configfs_dev_params,
 	.show_configfs_dev_params = pscsi_show_configfs_dev_params,

diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h
index 8a02fa4..b86fb0e 100644
--- a/drivers/target/target_core_pscsi.h
+++ b/drivers/target/target_core_pscsi.h

@@ -23,10 +23,6 @@ struct scsi_device;
 struct Scsi_Host;
 
 struct pscsi_plugin_task {
-	unsigned char pscsi_sense[TRANSPORT_SENSE_BUFFER];
-	int	pscsi_direction;
-	int	pscsi_result;
-	u32	pscsi_resid;
 	unsigned char pscsi_cdb[0];
 } ____cacheline_aligned;
 

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 20253d0..a6e8106 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c

@@ -339,10 +339,14 @@ static void rd_dev_call_rcu(struct rcu_head *p)
 
 static void rd_free_device(struct se_device *dev)
 {
+	call_rcu(&dev->rcu_head, rd_dev_call_rcu);
+}
+
+static void rd_destroy_device(struct se_device *dev)
+{
 	struct rd_dev *rd_dev = RD_DEV(dev);
 
 	rd_release_device_space(rd_dev);
-	call_rcu(&dev->rcu_head, rd_dev_call_rcu);
 }
 
 static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page)
@@ -554,7 +558,7 @@ static ssize_t rd_set_configfs_dev_params(struct se_device *dev,
 	struct rd_dev *rd_dev = RD_DEV(dev);
 	char *orig, *ptr, *opts;
 	substring_t args[MAX_OPT_ARGS];
-	int ret = 0, arg, token;
+	int arg, token;
 
 	opts = kstrdup(page, GFP_KERNEL);
 	if (!opts)
@@ -589,7 +593,7 @@ static ssize_t rd_set_configfs_dev_params(struct se_device *dev,
 	}
 
 	kfree(orig);
-	return (!ret) ? count : ret;
+	return count;
 }
 
 static ssize_t rd_show_configfs_dev_params(struct se_device *dev, char *b)
@@ -651,6 +655,7 @@ static const struct target_backend_ops rd_mcp_ops = {
 	.detach_hba		= rd_detach_hba,
 	.alloc_device		= rd_alloc_device,
 	.configure_device	= rd_configure_device,
+	.destroy_device		= rd_destroy_device,
 	.free_device		= rd_free_device,
 	.parse_cdb		= rd_parse_cdb,
 	.set_configfs_dev_params = rd_set_configfs_dev_params,

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4316f7b..750a04e 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c

@@ -71,14 +71,8 @@ sbc_emulate_readcapacity(struct se_cmd *cmd)
 	else
 		blocks = (u32)blocks_long;
 
-	buf[0] = (blocks >> 24) & 0xff;
-	buf[1] = (blocks >> 16) & 0xff;
-	buf[2] = (blocks >> 8) & 0xff;
-	buf[3] = blocks & 0xff;
-	buf[4] = (dev->dev_attrib.block_size >> 24) & 0xff;
-	buf[5] = (dev->dev_attrib.block_size >> 16) & 0xff;
-	buf[6] = (dev->dev_attrib.block_size >> 8) & 0xff;
-	buf[7] = dev->dev_attrib.block_size & 0xff;
+	put_unaligned_be32(blocks, &buf[0]);
+	put_unaligned_be32(dev->dev_attrib.block_size, &buf[4]);
 
 	rbuf = transport_kmap_data_sg(cmd);
 	if (rbuf) {
@@ -102,18 +96,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 	unsigned long long blocks = dev->transport->get_blocks(dev);
 
 	memset(buf, 0, sizeof(buf));
-	buf[0] = (blocks >> 56) & 0xff;
-	buf[1] = (blocks >> 48) & 0xff;
-	buf[2] = (blocks >> 40) & 0xff;
-	buf[3] = (blocks >> 32) & 0xff;
-	buf[4] = (blocks >> 24) & 0xff;
-	buf[5] = (blocks >> 16) & 0xff;
-	buf[6] = (blocks >> 8) & 0xff;
-	buf[7] = blocks & 0xff;
-	buf[8] = (dev->dev_attrib.block_size >> 24) & 0xff;
-	buf[9] = (dev->dev_attrib.block_size >> 16) & 0xff;
-	buf[10] = (dev->dev_attrib.block_size >> 8) & 0xff;
-	buf[11] = dev->dev_attrib.block_size & 0xff;
+	put_unaligned_be64(blocks, &buf[0]);
+	put_unaligned_be32(dev->dev_attrib.block_size, &buf[8]);
 	/*
 	 * Set P_TYPE and PROT_EN bits for DIF support
 	 */
@@ -134,8 +118,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 
 	if (dev->transport->get_alignment_offset_lbas) {
 		u16 lalba = dev->transport->get_alignment_offset_lbas(dev);
-		buf[14] = (lalba >> 8) & 0x3f;
-		buf[15] = lalba & 0xff;
+
+		put_unaligned_be16(lalba, &buf[14]);
 	}
 
 	/*
@@ -262,18 +246,17 @@ static inline u32 transport_get_sectors_6(unsigned char *cdb)
 
 static inline u32 transport_get_sectors_10(unsigned char *cdb)
 {
-	return (u32)(cdb[7] << 8) + cdb[8];
+	return get_unaligned_be16(&cdb[7]);
 }
 
 static inline u32 transport_get_sectors_12(unsigned char *cdb)
 {
-	return (u32)(cdb[6] << 24) + (cdb[7] << 16) + (cdb[8] << 8) + cdb[9];
+	return get_unaligned_be32(&cdb[6]);
 }
 
 static inline u32 transport_get_sectors_16(unsigned char *cdb)
 {
-	return (u32)(cdb[10] << 24) + (cdb[11] << 16) +
-		    (cdb[12] << 8) + cdb[13];
+	return get_unaligned_be32(&cdb[10]);
 }
 
 /*
@@ -281,29 +264,23 @@ static inline u32 transport_get_sectors_16(unsigned char *cdb)
  */
 static inline u32 transport_get_sectors_32(unsigned char *cdb)
 {
-	return (u32)(cdb[28] << 24) + (cdb[29] << 16) +
-		    (cdb[30] << 8) + cdb[31];
+	return get_unaligned_be32(&cdb[28]);
 
 }
 
 static inline u32 transport_lba_21(unsigned char *cdb)
 {
-	return ((cdb[1] & 0x1f) << 16) | (cdb[2] << 8) | cdb[3];
+	return get_unaligned_be24(&cdb[1]) & 0x1fffff;
 }
 
 static inline u32 transport_lba_32(unsigned char *cdb)
 {
-	return (cdb[2] << 24) | (cdb[3] << 16) | (cdb[4] << 8) | cdb[5];
+	return get_unaligned_be32(&cdb[2]);
 }
 
 static inline unsigned long long transport_lba_64(unsigned char *cdb)
 {
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[2] << 24) | (cdb[3] << 16) | (cdb[4] << 8) | cdb[5];
-	__v2 = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
+	return get_unaligned_be64(&cdb[2]);
 }
 
 /*
@@ -311,12 +288,7 @@ static inline unsigned long long transport_lba_64(unsigned char *cdb)
  */
 static inline unsigned long long transport_lba_64_ext(unsigned char *cdb)
 {
-	unsigned int __v1, __v2;
-
-	__v1 = (cdb[12] << 24) | (cdb[13] << 16) | (cdb[14] << 8) | cdb[15];
-	__v2 = (cdb[16] << 24) | (cdb[17] << 16) | (cdb[18] << 8) | cdb[19];
-
-	return ((unsigned long long)__v2) | (unsigned long long)__v1 << 32;
+	return get_unaligned_be64(&cdb[12]);
 }
 
 static sense_reason_t
@@ -1005,6 +977,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		break;
 	}
 	case COMPARE_AND_WRITE:
+		if (!dev->dev_attrib.emulate_caw) {
+			pr_err_ratelimited("se_device %s/%s (vpd_unit_serial %s) reject"
+				" COMPARE_AND_WRITE\n", dev->se_hba->backend->ops->name,
+				dev->dev_group.cg_item.ci_name, dev->t10_wwn.unit_serial);
+			return TCM_UNSUPPORTED_SCSI_OPCODE;
+		}
 		sectors = cdb[13];
 		/*
 		 * Currently enforce COMPARE_AND_WRITE for a single sector
@@ -1045,8 +1023,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 				cmd->t_task_cdb[1] & 0x1f);
 			return TCM_INVALID_CDB_FIELD;
 		}
-		size = (cdb[10] << 24) | (cdb[11] << 16) |
-		       (cdb[12] << 8) | cdb[13];
+		size = get_unaligned_be32(&cdb[10]);
 		break;
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
@@ -1450,7 +1427,7 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
 				 (unsigned long long)sector, sdt->guard_tag,
 				 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-			if (sdt->app_tag == cpu_to_be16(0xffff)) {
+			if (sdt->app_tag == T10_PI_APP_ESCAPE) {
 				dsg_off += block_size;
 				goto next;
 			}

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 2a91ed3..cb0461a1 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c

@@ -287,8 +287,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		/* Skip over Obsolete field in RTPI payload
 		 * in Table 472 */
 		off += 2;
-		buf[off++] = ((lun->lun_rtpi >> 8) & 0xff);
-		buf[off++] = (lun->lun_rtpi & 0xff);
+		put_unaligned_be16(lun->lun_rtpi, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * Target port group identifier, see spc4r17
@@ -316,8 +316,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		off++; /* Skip over Reserved */
 		buf[off++] = 4; /* DESIGNATOR LENGTH */
 		off += 2; /* Skip over Reserved Field */
-		buf[off++] = ((tg_pt_gp_id >> 8) & 0xff);
-		buf[off++] = (tg_pt_gp_id & 0xff);
+		put_unaligned_be16(tg_pt_gp_id, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * Logical Unit Group identifier, see spc4r17
@@ -343,8 +343,8 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		off++; /* Skip over Reserved */
 		buf[off++] = 4; /* DESIGNATOR LENGTH */
 		off += 2; /* Skip over Reserved Field */
-		buf[off++] = ((lu_gp_id >> 8) & 0xff);
-		buf[off++] = (lu_gp_id & 0xff);
+		put_unaligned_be16(lu_gp_id, &buf[off]);
+		off += 2;
 		len += 8; /* Header size + Designation descriptor */
 		/*
 		 * SCSI name string designator, see spc4r17
@@ -431,8 +431,7 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 		/* Header size + Designation descriptor */
 		len += (scsi_target_len + 4);
 	}
-	buf[2] = ((len >> 8) & 0xff);
-	buf[3] = (len & 0xff); /* Page Length for VPD 0x83 */
+	put_unaligned_be16(len, &buf[2]); /* Page Length for VPD 0x83 */
 	return 0;
 }
 EXPORT_SYMBOL(spc_emulate_evpd_83);
@@ -1288,7 +1287,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_modeselect;
 		break;
 	case MODE_SELECT_10:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = spc_emulate_modeselect;
 		break;
 	case MODE_SENSE:
@@ -1296,25 +1295,25 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_modesense;
 		break;
 	case MODE_SENSE_10:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = spc_emulate_modesense;
 		break;
 	case LOG_SELECT:
 	case LOG_SENSE:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		break;
 	case PERSISTENT_RESERVE_IN:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be16(&cdb[7]);
 		cmd->execute_cmd = target_scsi3_emulate_pr_in;
 		break;
 	case PERSISTENT_RESERVE_OUT:
-		*size = (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be32(&cdb[5]);
 		cmd->execute_cmd = target_scsi3_emulate_pr_out;
 		break;
 	case RELEASE:
 	case RELEASE_10:
 		if (cdb[0] == RELEASE_10)
-			*size = (cdb[7] << 8) | cdb[8];
+			*size = get_unaligned_be16(&cdb[7]);
 		else
 			*size = cmd->data_length;
 
@@ -1327,7 +1326,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		 * Assume the passthrough or $FABRIC_MOD will tell us about it.
 		 */
 		if (cdb[0] == RESERVE_10)
-			*size = (cdb[7] << 8) | cdb[8];
+			*size = get_unaligned_be16(&cdb[7]);
 		else
 			*size = cmd->data_length;
 
@@ -1338,7 +1337,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		cmd->execute_cmd = spc_emulate_request_sense;
 		break;
 	case INQUIRY:
-		*size = (cdb[3] << 8) + cdb[4];
+		*size = get_unaligned_be16(&cdb[3]);
 
 		/*
 		 * Do implicit HEAD_OF_QUEUE processing for INQUIRY.
@@ -1349,7 +1348,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		break;
 	case SECURITY_PROTOCOL_IN:
 	case SECURITY_PROTOCOL_OUT:
-		*size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
+		*size = get_unaligned_be32(&cdb[6]);
 		break;
 	case EXTENDED_COPY:
 		*size = get_unaligned_be32(&cdb[10]);
@@ -1361,19 +1360,18 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 		break;
 	case READ_ATTRIBUTE:
 	case WRITE_ATTRIBUTE:
-		*size = (cdb[10] << 24) | (cdb[11] << 16) |
-		       (cdb[12] << 8) | cdb[13];
+		*size = get_unaligned_be32(&cdb[10]);
 		break;
 	case RECEIVE_DIAGNOSTIC:
 	case SEND_DIAGNOSTIC:
-		*size = (cdb[3] << 8) | cdb[4];
+		*size = get_unaligned_be16(&cdb[3]);
 		break;
 	case WRITE_BUFFER:
-		*size = (cdb[6] << 16) + (cdb[7] << 8) + cdb[8];
+		*size = get_unaligned_be24(&cdb[6]);
 		break;
 	case REPORT_LUNS:
 		cmd->execute_cmd = spc_emulate_report_luns;
-		*size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
+		*size = get_unaligned_be32(&cdb[6]);
 		/*
 		 * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS
 		 * See spc4r17 section 5.3

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 13f47bf..e22847b 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c

@@ -355,20 +355,10 @@ static void core_tmr_drain_state_list(
 		cmd = list_entry(drain_task_list.next, struct se_cmd, state_list);
 		list_del_init(&cmd->state_list);
 
-		pr_debug("LUN_RESET: %s cmd: %p"
-			" ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d"
-			"cdb: 0x%02x\n",
-			(preempt_and_abort_list) ? "Preempt" : "", cmd,
-			cmd->tag, 0,
-			cmd->se_tfo->get_cmd_state(cmd), cmd->t_state,
-			cmd->t_task_cdb[0]);
-		pr_debug("LUN_RESET: ITT[0x%08llx] - pr_res_key: 0x%016Lx"
-			" -- CMD_T_ACTIVE: %d"
-			" CMD_T_STOP: %d CMD_T_SENT: %d\n",
-			cmd->tag, cmd->pr_res_key,
-			(cmd->transport_state & CMD_T_ACTIVE) != 0,
-			(cmd->transport_state & CMD_T_STOP) != 0,
-			(cmd->transport_state & CMD_T_SENT) != 0);
+		target_show_cmd("LUN_RESET: ", cmd);
+		pr_debug("LUN_RESET: ITT[0x%08llx] - %s pr_res_key: 0x%016Lx\n",
+			 cmd->tag, (preempt_and_abort_list) ? "preempt" : "",
+			 cmd->pr_res_key);
 
 		/*
 		 * If the command may be queued onto a workqueue cancel it now.

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 310d9e5..3691373 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c

@@ -576,7 +576,6 @@ struct se_lun *core_tpg_alloc_lun(
 		return ERR_PTR(-ENOMEM);
 	}
 	lun->unpacked_lun = unpacked_lun;
-	lun->lun_link_magic = SE_LUN_LINK_MAGIC;
 	atomic_set(&lun->lun_acl_count, 0);
 	init_completion(&lun->lun_ref_comp);
 	init_completion(&lun->lun_shutdown_comp);

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index f1b3a46..97fed9a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 	int rc;
 
 	se_sess->sess_cmd_map = kzalloc(tag_num * tag_size,
-					GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+					GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!se_sess->sess_cmd_map) {
 		se_sess->sess_cmd_map = vzalloc(tag_num * tag_size);
 		if (!se_sess->sess_cmd_map) {
@@ -704,23 +704,43 @@ static unsigned char *transport_get_sense_buffer(struct se_cmd *cmd)
 	return cmd->sense_buffer;
 }
 
+void transport_copy_sense_to_cmd(struct se_cmd *cmd, unsigned char *sense)
+{
+	unsigned char *cmd_sense_buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd_sense_buf = transport_get_sense_buffer(cmd);
+	if (!cmd_sense_buf) {
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		return;
+	}
+
+	cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
+	memcpy(cmd_sense_buf, sense, cmd->scsi_sense_length);
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+}
+EXPORT_SYMBOL(transport_copy_sense_to_cmd);
+
 void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 {
 	struct se_device *dev = cmd->se_dev;
-	int success = scsi_status == GOOD;
+	int success;
 	unsigned long flags;
 
 	cmd->scsi_status = scsi_status;
 
-
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-
-	if (dev && dev->transport->transport_complete) {
-		dev->transport->transport_complete(cmd,
-				cmd->t_data_sg,
-				transport_get_sense_buffer(cmd));
+	switch (cmd->scsi_status) {
+	case SAM_STAT_CHECK_CONDITION:
 		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
 			success = 1;
+		else
+			success = 0;
+		break;
+	default:
+		success = 1;
+		break;
 	}
 
 	/*
@@ -730,6 +750,15 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 	if (cmd->transport_state & CMD_T_ABORTED ||
 	    cmd->transport_state & CMD_T_STOP) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		/*
+		 * If COMPARE_AND_WRITE was stopped by __transport_wait_for_tasks(),
+		 * release se_device->caw_sem obtained by sbc_compare_and_write()
+		 * since target_complete_ok_work() or target_complete_failure_work()
+		 * won't be called to invoke the normal CAW completion callbacks.
+		 */
+		if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
+			up(&dev->caw_sem);
+		}
 		complete_all(&cmd->t_transport_stop_comp);
 		return;
 	} else if (!success) {
@@ -1239,6 +1268,7 @@ void transport_init_se_cmd(
 	init_completion(&cmd->t_transport_stop_comp);
 	init_completion(&cmd->cmd_wait_comp);
 	spin_lock_init(&cmd->t_state_lock);
+	INIT_WORK(&cmd->work, NULL);
 	kref_init(&cmd->cmd_kref);
 
 	cmd->se_tfo = tfo;
@@ -1590,9 +1620,33 @@ static void target_complete_tmr_failure(struct work_struct *work)
 	se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST;
 	se_cmd->se_tfo->queue_tm_rsp(se_cmd);
 
+	transport_lun_remove_cmd(se_cmd);
 	transport_cmd_check_stop_to_fabric(se_cmd);
 }
 
+static bool target_lookup_lun_from_tag(struct se_session *se_sess, u64 tag,
+				       u64 *unpacked_lun)
+{
+	struct se_cmd *se_cmd;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
+		if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
+			continue;
+
+		if (se_cmd->tag == tag) {
+			*unpacked_lun = se_cmd->orig_fe_lun;
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+	return ret;
+}
+
 /**
  * target_submit_tmr - lookup unpacked lun and submit uninitialized se_cmd
  *                     for TMR CDBs
@@ -1640,19 +1694,31 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 		core_tmr_release_req(se_cmd->se_tmr_req);
 		return ret;
 	}
+	/*
+	 * If this is ABORT_TASK with no explicit fabric provided LUN,
+	 * go ahead and search active session tags for a match to figure
+	 * out unpacked_lun for the original se_cmd.
+	 */
+	if (tm_type == TMR_ABORT_TASK && (flags & TARGET_SCF_LOOKUP_LUN_FROM_TAG)) {
+		if (!target_lookup_lun_from_tag(se_sess, tag, &unpacked_lun))
+			goto failure;
+	}
 
 	ret = transport_lookup_tmr_lun(se_cmd, unpacked_lun);
-	if (ret) {
-		/*
-		 * For callback during failure handling, push this work off
-		 * to process context with TMR_LUN_DOES_NOT_EXIST status.
-		 */
-		INIT_WORK(&se_cmd->work, target_complete_tmr_failure);
-		schedule_work(&se_cmd->work);
-		return 0;
-	}
+	if (ret)
+		goto failure;
+
 	transport_generic_handle_tmr(se_cmd);
 	return 0;
+
+	/*
+	 * For callback during failure handling, push this work off
+	 * to process context with TMR_LUN_DOES_NOT_EXIST status.
+	 */
+failure:
+	INIT_WORK(&se_cmd->work, target_complete_tmr_failure);
+	schedule_work(&se_cmd->work);
+	return 0;
 }
 EXPORT_SYMBOL(target_submit_tmr);
 
@@ -1667,15 +1733,9 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	if (transport_check_aborted_status(cmd, 1))
 		return;
 
-	pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
-		" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
-	pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n",
-		cmd->se_tfo->get_cmd_state(cmd),
-		cmd->t_state, sense_reason);
-	pr_debug("-----[ CMD_T_ACTIVE: %d CMD_T_STOP: %d CMD_T_SENT: %d\n",
-		(cmd->transport_state & CMD_T_ACTIVE) != 0,
-		(cmd->transport_state & CMD_T_STOP) != 0,
-		(cmd->transport_state & CMD_T_SENT) != 0);
+	pr_debug("-----[ Storage Engine Exception; sense_reason %d\n",
+		 sense_reason);
+	target_show_cmd("-----[ ", cmd);
 
 	/*
 	 * For SAM Task Attribute emulation for failed struct se_cmd
@@ -2668,6 +2728,108 @@ int target_put_sess_cmd(struct se_cmd *se_cmd)
 }
 EXPORT_SYMBOL(target_put_sess_cmd);
 
+static const char *data_dir_name(enum dma_data_direction d)
+{
+	switch (d) {
+	case DMA_BIDIRECTIONAL:	return "BIDI";
+	case DMA_TO_DEVICE:	return "WRITE";
+	case DMA_FROM_DEVICE:	return "READ";
+	case DMA_NONE:		return "NONE";
+	}
+
+	return "(?)";
+}
+
+static const char *cmd_state_name(enum transport_state_table t)
+{
+	switch (t) {
+	case TRANSPORT_NO_STATE:	return "NO_STATE";
+	case TRANSPORT_NEW_CMD:		return "NEW_CMD";
+	case TRANSPORT_WRITE_PENDING:	return "WRITE_PENDING";
+	case TRANSPORT_PROCESSING:	return "PROCESSING";
+	case TRANSPORT_COMPLETE:	return "COMPLETE";
+	case TRANSPORT_ISTATE_PROCESSING:
+					return "ISTATE_PROCESSING";
+	case TRANSPORT_COMPLETE_QF_WP:	return "COMPLETE_QF_WP";
+	case TRANSPORT_COMPLETE_QF_OK:	return "COMPLETE_QF_OK";
+	case TRANSPORT_COMPLETE_QF_ERR:	return "COMPLETE_QF_ERR";
+	}
+
+	return "(?)";
+}
+
+static void target_append_str(char **str, const char *txt)
+{
+	char *prev = *str;
+
+	*str = *str ? kasprintf(GFP_ATOMIC, "%s,%s", *str, txt) :
+		kstrdup(txt, GFP_ATOMIC);
+	kfree(prev);
+}
+
+/*
+ * Convert a transport state bitmask into a string. The caller is
+ * responsible for freeing the returned pointer.
+ */
+static char *target_ts_to_str(u32 ts)
+{
+	char *str = NULL;
+
+	if (ts & CMD_T_ABORTED)
+		target_append_str(&str, "aborted");
+	if (ts & CMD_T_ACTIVE)
+		target_append_str(&str, "active");
+	if (ts & CMD_T_COMPLETE)
+		target_append_str(&str, "complete");
+	if (ts & CMD_T_SENT)
+		target_append_str(&str, "sent");
+	if (ts & CMD_T_STOP)
+		target_append_str(&str, "stop");
+	if (ts & CMD_T_FABRIC_STOP)
+		target_append_str(&str, "fabric_stop");
+
+	return str;
+}
+
+static const char *target_tmf_name(enum tcm_tmreq_table tmf)
+{
+	switch (tmf) {
+	case TMR_ABORT_TASK:		return "ABORT_TASK";
+	case TMR_ABORT_TASK_SET:	return "ABORT_TASK_SET";
+	case TMR_CLEAR_ACA:		return "CLEAR_ACA";
+	case TMR_CLEAR_TASK_SET:	return "CLEAR_TASK_SET";
+	case TMR_LUN_RESET:		return "LUN_RESET";
+	case TMR_TARGET_WARM_RESET:	return "TARGET_WARM_RESET";
+	case TMR_TARGET_COLD_RESET:	return "TARGET_COLD_RESET";
+	case TMR_UNKNOWN:		break;
+	}
+	return "(?)";
+}
+
+void target_show_cmd(const char *pfx, struct se_cmd *cmd)
+{
+	char *ts_str = target_ts_to_str(cmd->transport_state);
+	const u8 *cdb = cmd->t_task_cdb;
+	struct se_tmr_req *tmf = cmd->se_tmr_req;
+
+	if (!(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
+		pr_debug("%scmd %#02x:%#02x with tag %#llx dir %s i_state %d t_state %s len %d refcnt %d transport_state %s\n",
+			 pfx, cdb[0], cdb[1], cmd->tag,
+			 data_dir_name(cmd->data_direction),
+			 cmd->se_tfo->get_cmd_state(cmd),
+			 cmd_state_name(cmd->t_state), cmd->data_length,
+			 kref_read(&cmd->cmd_kref), ts_str);
+	} else {
+		pr_debug("%stmf %s with tag %#llx ref_task_tag %#llx i_state %d t_state %s refcnt %d transport_state %s\n",
+			 pfx, target_tmf_name(tmf->function), cmd->tag,
+			 tmf->ref_task_tag, cmd->se_tfo->get_cmd_state(cmd),
+			 cmd_state_name(cmd->t_state),
+			 kref_read(&cmd->cmd_kref), ts_str);
+	}
+	kfree(ts_str);
+}
+EXPORT_SYMBOL(target_show_cmd);
+
 /* target_sess_cmd_list_set_waiting - Flag all commands in
  *         sess_cmd_list to complete cmd_wait_comp.  Set
  *         sess_tearing_down so no more commands are queued.
@@ -2812,13 +2974,13 @@ __transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
 
 	cmd->transport_state |= CMD_T_STOP;
 
-	pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d,"
-		 " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag,
-		 cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
+	target_show_cmd("wait_for_tasks: Stopping ", cmd);
 
 	spin_unlock_irqrestore(&cmd->t_state_lock, *flags);
 
-	wait_for_completion(&cmd->t_transport_stop_comp);
+	while (!wait_for_completion_timeout(&cmd->t_transport_stop_comp,
+					    180 * HZ))
+		target_show_cmd("wait for tasks: ", cmd);
 
 	spin_lock_irqsave(&cmd->t_state_lock, *flags);
 	cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
@@ -3201,6 +3363,7 @@ static void target_tmr_work(struct work_struct *work)
 	cmd->se_tfo->queue_tm_rsp(cmd);
 
 check_stop:
+	transport_lun_remove_cmd(cmd);
 	transport_cmd_check_stop_to_fabric(cmd);
 }
 
@@ -3223,6 +3386,7 @@ int transport_generic_handle_tmr(
 		pr_warn_ratelimited("handle_tmr caught CMD_T_ABORTED TMR %d"
 			"ref_tag: %llu tag: %llu\n", cmd->se_tmr_req->function,
 			cmd->se_tmr_req->ref_task_tag, cmd->tag);
+		transport_lun_remove_cmd(cmd);
 		transport_cmd_check_stop_to_fabric(cmd);
 		return 0;
 	}

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index beb5f09..80ee130 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c

@@ -87,6 +87,8 @@
 /* Default maximum of the global data blocks(512K * PAGE_SIZE) */
 #define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
 
+static u8 tcmu_kern_cmd_reply_supported;
+
 static struct device *tcmu_root_device;
 
 struct tcmu_hba {
@@ -95,6 +97,13 @@ struct tcmu_hba {
 
 #define TCMU_CONFIG_LEN 256
 
+struct tcmu_nl_cmd {
+	/* wake up thread waiting for reply */
+	struct completion complete;
+	int cmd;
+	int status;
+};
+
 struct tcmu_dev {
 	struct list_head node;
 	struct kref kref;
@@ -135,6 +144,11 @@ struct tcmu_dev {
 	struct timer_list timeout;
 	unsigned int cmd_time_out;
 
+	spinlock_t nl_cmd_lock;
+	struct tcmu_nl_cmd curr_nl_cmd;
+	/* wake up threads waiting on curr_nl_cmd */
+	wait_queue_head_t nl_cmd_wq;
+
 	char dev_config[TCMU_CONFIG_LEN];
 };
 
@@ -178,16 +192,128 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 	[TCMU_MCGRP_CONFIG] = { .name = "config", },
 };
 
+static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = {
+	[TCMU_ATTR_DEVICE]	= { .type = NLA_STRING },
+	[TCMU_ATTR_MINOR]	= { .type = NLA_U32 },
+	[TCMU_ATTR_CMD_STATUS]	= { .type = NLA_S32 },
+	[TCMU_ATTR_DEVICE_ID]	= { .type = NLA_U32 },
+	[TCMU_ATTR_SUPP_KERN_CMD_REPLY] = { .type = NLA_U8 },
+};
+
+static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd)
+{
+	struct se_device *dev;
+	struct tcmu_dev *udev;
+	struct tcmu_nl_cmd *nl_cmd;
+	int dev_id, rc, ret = 0;
+	bool is_removed = (completed_cmd == TCMU_CMD_REMOVED_DEVICE);
+
+	if (!info->attrs[TCMU_ATTR_CMD_STATUS] ||
+	    !info->attrs[TCMU_ATTR_DEVICE_ID]) {
+		printk(KERN_ERR "TCMU_ATTR_CMD_STATUS or TCMU_ATTR_DEVICE_ID not set, doing nothing\n");
+                return -EINVAL;
+        }
+
+	dev_id = nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]);
+	rc = nla_get_s32(info->attrs[TCMU_ATTR_CMD_STATUS]);
+
+	dev = target_find_device(dev_id, !is_removed);
+	if (!dev) {
+		printk(KERN_ERR "tcmu nl cmd %u/%u completion could not find device with dev id %u.\n",
+		       completed_cmd, rc, dev_id);
+		return -ENODEV;
+	}
+	udev = TCMU_DEV(dev);
+
+	spin_lock(&udev->nl_cmd_lock);
+	nl_cmd = &udev->curr_nl_cmd;
+
+	pr_debug("genl cmd done got id %d curr %d done %d rc %d\n", dev_id,
+		 nl_cmd->cmd, completed_cmd, rc);
+
+	if (nl_cmd->cmd != completed_cmd) {
+		printk(KERN_ERR "Mismatched commands (Expecting reply for %d. Current %d).\n",
+		       completed_cmd, nl_cmd->cmd);
+		ret = -EINVAL;
+	} else {
+		nl_cmd->status = rc;
+	}
+
+	spin_unlock(&udev->nl_cmd_lock);
+	if (!is_removed)
+		 target_undepend_item(&dev->dev_group.cg_item);
+	if (!ret)
+		complete(&nl_cmd->complete);
+	return ret;
+}
+
+static int tcmu_genl_rm_dev_done(struct sk_buff *skb, struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_REMOVED_DEVICE);
+}
+
+static int tcmu_genl_add_dev_done(struct sk_buff *skb, struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_ADDED_DEVICE);
+}
+
+static int tcmu_genl_reconfig_dev_done(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	return tcmu_genl_cmd_done(info, TCMU_CMD_RECONFIG_DEVICE);
+}
+
+static int tcmu_genl_set_features(struct sk_buff *skb, struct genl_info *info)
+{
+	if (info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]) {
+		tcmu_kern_cmd_reply_supported  =
+			nla_get_u8(info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]);
+		printk(KERN_INFO "tcmu daemon: command reply support %u.\n",
+		       tcmu_kern_cmd_reply_supported);
+	}
+
+	return 0;
+}
+
+static const struct genl_ops tcmu_genl_ops[] = {
+	{
+		.cmd	= TCMU_CMD_SET_FEATURES,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_set_features,
+	},
+	{
+		.cmd	= TCMU_CMD_ADDED_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_add_dev_done,
+	},
+	{
+		.cmd	= TCMU_CMD_REMOVED_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_rm_dev_done,
+	},
+	{
+		.cmd	= TCMU_CMD_RECONFIG_DEVICE_DONE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= tcmu_attr_policy,
+		.doit	= tcmu_genl_reconfig_dev_done,
+	},
+};
+
 /* Our generic netlink family */
 static struct genl_family tcmu_genl_family __ro_after_init = {
 	.module = THIS_MODULE,
 	.hdrsize = 0,
 	.name = "TCM-USER",
-	.version = 1,
+	.version = 2,
 	.maxattr = TCMU_ATTR_MAX,
 	.mcgrps = tcmu_mcgrps,
 	.n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
 	.netnsok = true,
+	.ops = tcmu_genl_ops,
+	.n_ops = ARRAY_SIZE(tcmu_genl_ops),
 };
 
 #define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index))
@@ -216,7 +342,6 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 
 	page = radix_tree_lookup(&udev->data_blocks, dbi);
 	if (!page) {
-
 		if (atomic_add_return(1, &global_db_count) >
 					TCMU_GLOBAL_MAX_BLOCKS) {
 			atomic_dec(&global_db_count);
@@ -226,14 +351,11 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 		/* try to get new page from the mm */
 		page = alloc_page(GFP_KERNEL);
 		if (!page)
-			return false;
+			goto err_alloc;
 
 		ret = radix_tree_insert(&udev->data_blocks, dbi, page);
-		if (ret) {
-			__free_page(page);
-			return false;
-		}
-
+		if (ret)
+			goto err_insert;
 	}
 
 	if (dbi > udev->dbi_max)
@@ -243,6 +365,11 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 	tcmu_cmd_set_dbi(tcmu_cmd, dbi);
 
 	return true;
+err_insert:
+	__free_page(page);
+err_alloc:
+	atomic_dec(&global_db_count);
+	return false;
 }
 
 static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
@@ -401,7 +528,7 @@ static inline size_t get_block_offset_user(struct tcmu_dev *dev,
 		DATA_BLOCK_SIZE - remaining;
 }
 
-static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
+static inline size_t iov_tail(struct iovec *iov)
 {
 	return (size_t)iov->iov_base + iov->iov_len;
 }
@@ -437,10 +564,10 @@ static int scatter_data_area(struct tcmu_dev *udev,
 			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
 			offset = DATA_BLOCK_SIZE - block_remaining;
-			to = (void *)(unsigned long)to + offset;
+			to += offset;
 
 			if (*iov_cnt != 0 &&
-			    to_offset == iov_tail(udev, *iov)) {
+			    to_offset == iov_tail(*iov)) {
 				(*iov)->iov_len += copy_bytes;
 			} else {
 				new_iov(iov, iov_cnt, udev);
@@ -510,7 +637,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
 			offset = DATA_BLOCK_SIZE - block_remaining;
-			from = (void *)(unsigned long)from + offset;
+			from += offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
 			memcpy(to + sg->length - sg_remaining, from,
 					copy_bytes);
@@ -596,10 +723,7 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 		}
 	}
 
-	if (!tcmu_get_empty_blocks(udev, cmd))
-		return false;
-
-	return true;
+	return tcmu_get_empty_blocks(udev, cmd);
 }
 
 static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt)
@@ -699,25 +823,24 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
 
 		entry = (void *) mb + CMDR_OFF + cmd_head;
-		tcmu_flush_dcache_range(entry, sizeof(*entry));
 		tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
 		tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
 		entry->hdr.cmd_id = 0; /* not used for PAD */
 		entry->hdr.kflags = 0;
 		entry->hdr.uflags = 0;
+		tcmu_flush_dcache_range(entry, sizeof(*entry));
 
 		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
+		tcmu_flush_dcache_range(mb, sizeof(*mb));
 
 		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
 		WARN_ON(cmd_head != 0);
 	}
 
 	entry = (void *) mb + CMDR_OFF + cmd_head;
-	tcmu_flush_dcache_range(entry, sizeof(*entry));
+	memset(entry, 0, command_size);
 	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
-	entry->hdr.kflags = 0;
-	entry->hdr.uflags = 0;
 
 	/* Handle allocating space from the data area */
 	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
@@ -736,11 +859,10 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 	entry->req.iov_cnt = iov_cnt;
-	entry->req.iov_dif_cnt = 0;
 
 	/* Handle BIDI commands */
+	iov_cnt = 0;
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		iov_cnt = 0;
 		iov++;
 		ret = scatter_data_area(udev, tcmu_cmd,
 					se_cmd->t_bidi_data_sg,
@@ -753,8 +875,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 			pr_err("tcmu: alloc and scatter bidi data failed\n");
 			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		}
-		entry->req.iov_bidi_cnt = iov_cnt;
 	}
+	entry->req.iov_bidi_cnt = iov_cnt;
 
 	/*
 	 * Recalaulate the command's base size and size according
@@ -830,8 +952,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
-		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
-			       se_cmd->scsi_sense_length);
+		transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		/* Get Data-In buffer before clean up */
 		gather_data_area(udev, cmd, true);
@@ -989,6 +1110,9 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	setup_timer(&udev->timeout, tcmu_device_timedout,
 		(unsigned long)udev);
 
+	init_waitqueue_head(&udev->nl_cmd_wq);
+	spin_lock_init(&udev->nl_cmd_lock);
+
 	return &udev->se_dev;
 }
 
@@ -1140,6 +1264,7 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 		return -EBUSY;
 
 	udev->inode = inode;
+	kref_get(&udev->kref);
 
 	pr_debug("open\n");
 
@@ -1171,12 +1296,59 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-	/* release ref from configure */
+	/* release ref from open */
 	kref_put(&udev->kref, tcmu_dev_kref_release);
 	return 0;
 }
 
-static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
+static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd)
+{
+	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
+
+	if (!tcmu_kern_cmd_reply_supported)
+		return;
+relock:
+	spin_lock(&udev->nl_cmd_lock);
+
+	if (nl_cmd->cmd != TCMU_CMD_UNSPEC) {
+		spin_unlock(&udev->nl_cmd_lock);
+		pr_debug("sleeping for open nl cmd\n");
+		wait_event(udev->nl_cmd_wq, (nl_cmd->cmd == TCMU_CMD_UNSPEC));
+		goto relock;
+	}
+
+	memset(nl_cmd, 0, sizeof(*nl_cmd));
+	nl_cmd->cmd = cmd;
+	init_completion(&nl_cmd->complete);
+
+	spin_unlock(&udev->nl_cmd_lock);
+}
+
+static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
+{
+	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
+	int ret;
+	DEFINE_WAIT(__wait);
+
+	if (!tcmu_kern_cmd_reply_supported)
+		return 0;
+
+	pr_debug("sleeping for nl reply\n");
+	wait_for_completion(&nl_cmd->complete);
+
+	spin_lock(&udev->nl_cmd_lock);
+	nl_cmd->cmd = TCMU_CMD_UNSPEC;
+	ret = nl_cmd->status;
+	nl_cmd->status = 0;
+	spin_unlock(&udev->nl_cmd_lock);
+
+	wake_up_all(&udev->nl_cmd_wq);
+
+	return ret;;
+}
+
+static int tcmu_netlink_event(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd,
+			      int reconfig_attr, const void *reconfig_data)
 {
 	struct sk_buff *skb;
 	void *msg_header;
@@ -1190,22 +1362,51 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int mino
 	if (!msg_header)
 		goto free_skb;
 
-	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
+	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, udev->uio_info.name);
 	if (ret < 0)
 		goto free_skb;
 
-	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
+	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, udev->uio_info.uio_dev->minor);
 	if (ret < 0)
 		goto free_skb;
 
+	ret = nla_put_u32(skb, TCMU_ATTR_DEVICE_ID, udev->se_dev.dev_index);
+	if (ret < 0)
+		goto free_skb;
+
+	if (cmd == TCMU_CMD_RECONFIG_DEVICE) {
+		switch (reconfig_attr) {
+		case TCMU_ATTR_DEV_CFG:
+			ret = nla_put_string(skb, reconfig_attr, reconfig_data);
+			break;
+		case TCMU_ATTR_DEV_SIZE:
+			ret = nla_put_u64_64bit(skb, reconfig_attr,
+						*((u64 *)reconfig_data),
+						TCMU_ATTR_PAD);
+			break;
+		case TCMU_ATTR_WRITECACHE:
+			ret = nla_put_u8(skb, reconfig_attr,
+					  *((u8 *)reconfig_data));
+			break;
+		default:
+			BUG();
+		}
+
+		if (ret < 0)
+			goto free_skb;
+	}
+
 	genlmsg_end(skb, msg_header);
 
+	tcmu_init_genl_cmd_reply(udev, cmd);
+
 	ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0,
 				TCMU_MCGRP_CONFIG, GFP_KERNEL);
-
 	/* We don't care if no one is listening */
 	if (ret == -ESRCH)
 		ret = 0;
+	if (!ret)
+		ret = tcmu_wait_genl_cmd_reply(udev);
 
 	return ret;
 free_skb:
@@ -1213,19 +1414,14 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int mino
 	return ret;
 }
 
-static int tcmu_configure_device(struct se_device *dev)
+static int tcmu_update_uio_info(struct tcmu_dev *udev)
 {
-	struct tcmu_dev *udev = TCMU_DEV(dev);
 	struct tcmu_hba *hba = udev->hba->hba_ptr;
 	struct uio_info *info;
-	struct tcmu_mailbox *mb;
-	size_t size;
-	size_t used;
-	int ret = 0;
+	size_t size, used;
 	char *str;
 
 	info = &udev->uio_info;
-
 	size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
 			udev->dev_config);
 	size += 1; /* for \0 */
@@ -1234,12 +1430,27 @@ static int tcmu_configure_device(struct se_device *dev)
 		return -ENOMEM;
 
 	used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
-
 	if (udev->dev_config[0])
 		snprintf(str + used, size - used, "/%s", udev->dev_config);
 
 	info->name = str;
 
+	return 0;
+}
+
+static int tcmu_configure_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	struct uio_info *info;
+	struct tcmu_mailbox *mb;
+	int ret = 0;
+
+	ret = tcmu_update_uio_info(udev);
+	if (ret)
+		return ret;
+
+	info = &udev->uio_info;
+
 	udev->mb_addr = vzalloc(CMDR_SIZE);
 	if (!udev->mb_addr) {
 		ret = -ENOMEM;
@@ -1290,6 +1501,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	/* Other attributes can be configured in userspace */
 	if (!dev->dev_attrib.hw_max_sectors)
 		dev->dev_attrib.hw_max_sectors = 128;
+	if (!dev->dev_attrib.emulate_write_cache)
+		dev->dev_attrib.emulate_write_cache = 0;
 	dev->dev_attrib.hw_queue_depth = 128;
 
 	/*
@@ -1298,8 +1511,7 @@ static int tcmu_configure_device(struct se_device *dev)
 	 */
 	kref_get(&udev->kref);
 
-	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
-				 udev->uio_info.uio_dev->minor);
+	ret = tcmu_netlink_event(udev, TCMU_CMD_ADDED_DEVICE, 0, NULL);
 	if (ret)
 		goto err_netlink;
 
@@ -1355,6 +1567,14 @@ static void tcmu_blocks_release(struct tcmu_dev *udev)
 static void tcmu_free_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	/* release ref from init */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
+static void tcmu_destroy_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
 	struct tcmu_cmd *cmd;
 	bool all_expired = true;
 	int i;
@@ -1379,14 +1599,11 @@ static void tcmu_free_device(struct se_device *dev)
 
 	tcmu_blocks_release(udev);
 
-	if (tcmu_dev_configured(udev)) {
-		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
-				   udev->uio_info.uio_dev->minor);
+	tcmu_netlink_event(udev, TCMU_CMD_REMOVED_DEVICE, 0, NULL);
 
-		uio_unregister_device(&udev->uio_info);
-	}
+	uio_unregister_device(&udev->uio_info);
 
-	/* release ref from init */
+	/* release ref from configure */
 	kref_put(&udev->kref, tcmu_dev_kref_release);
 }
 
@@ -1546,6 +1763,129 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_dev_config_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%s\n", udev->dev_config);
+}
+
+static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page,
+				     size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	int ret, len;
+
+	len = strlen(page);
+	if (!len || len > TCMU_CONFIG_LEN - 1)
+		return -EINVAL;
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
+					 TCMU_ATTR_DEV_CFG, page);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+		strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
+
+		ret = tcmu_update_uio_info(udev);
+		if (ret)
+			return ret;
+		return count;
+	}
+	strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
+
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, dev_config);
+
+static ssize_t tcmu_dev_size_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%zu\n", udev->dev_size);
+}
+
+static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
+				   size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	u64 val;
+	int ret;
+
+	ret = kstrtou64(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
+					 TCMU_ATTR_DEV_SIZE, &val);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+	}
+	udev->dev_size = val;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, dev_size);
+
+static ssize_t tcmu_emulate_write_cache_show(struct config_item *item,
+					     char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+
+	return snprintf(page, PAGE_SIZE, "%i\n", da->emulate_write_cache);
+}
+
+static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
+					      const char *page, size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	u8 val;
+	int ret;
+
+	ret = kstrtou8(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	/* Check if device has been configured before */
+	if (tcmu_dev_configured(udev)) {
+		ret = tcmu_netlink_event(udev, TCMU_CMD_RECONFIG_DEVICE,
+					 TCMU_ATTR_WRITECACHE, &val);
+		if (ret) {
+			pr_err("Unable to reconfigure device\n");
+			return ret;
+		}
+	}
+
+	da->emulate_write_cache = val;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, emulate_write_cache);
+
+static struct configfs_attribute *tcmu_attrib_attrs[] = {
+	&tcmu_attr_cmd_time_out,
+	&tcmu_attr_dev_config,
+	&tcmu_attr_dev_size,
+	&tcmu_attr_emulate_write_cache,
+	NULL,
+};
+
 static struct configfs_attribute **tcmu_attrs;
 
 static struct target_backend_ops tcmu_ops = {
@@ -1556,6 +1896,7 @@ static struct target_backend_ops tcmu_ops = {
 	.detach_hba		= tcmu_detach_hba,
 	.alloc_device		= tcmu_alloc_device,
 	.configure_device	= tcmu_configure_device,
+	.destroy_device		= tcmu_destroy_device,
 	.free_device		= tcmu_free_device,
 	.parse_cdb		= tcmu_parse_cdb,
 	.set_configfs_dev_params = tcmu_set_configfs_dev_params,
@@ -1573,7 +1914,7 @@ static int unmap_thread_fn(void *data)
 	struct page *page;
 	int i;
 
-	while (1) {
+	while (!kthread_should_stop()) {
 		DEFINE_WAIT(__wait);
 
 		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
@@ -1645,7 +1986,7 @@ static int unmap_thread_fn(void *data)
 
 static int __init tcmu_module_init(void)
 {
-	int ret, i, len = 0;
+	int ret, i, k, len = 0;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
@@ -1670,7 +2011,10 @@ static int __init tcmu_module_init(void)
 	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
 		len += sizeof(struct configfs_attribute *);
 	}
-	len += sizeof(struct configfs_attribute *) * 2;
+	for (i = 0; tcmu_attrib_attrs[i] != NULL; i++) {
+		len += sizeof(struct configfs_attribute *);
+	}
+	len += sizeof(struct configfs_attribute *);
 
 	tcmu_attrs = kzalloc(len, GFP_KERNEL);
 	if (!tcmu_attrs) {
@@ -1681,7 +2025,10 @@ static int __init tcmu_module_init(void)
 	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
 		tcmu_attrs[i] = passthrough_attrib_attrs[i];
 	}
-	tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+	for (k = 0; tcmu_attrib_attrs[k] != NULL; k++) {
+		tcmu_attrs[i] = tcmu_attrib_attrs[k];
+		i++;
+	}
 	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
 
 	ret = transport_backend_register(&tcmu_ops);

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index cac5a20..9ee89e0 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c

@@ -40,6 +40,8 @@
 
 static struct workqueue_struct *xcopy_wq = NULL;
 
+static sense_reason_t target_parse_xcopy_cmd(struct xcopy_op *xop);
+
 static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 {
 	int off = 0;
@@ -53,48 +55,60 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 	return 0;
 }
 
-static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
-					struct se_device **found_dev)
+struct xcopy_dev_search_info {
+	const unsigned char *dev_wwn;
+	struct se_device *found_dev;
+};
+
+static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev,
+					      void *data)
 {
-	struct se_device *se_dev;
+	struct xcopy_dev_search_info *info = data;
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
-	mutex_lock(&g_device_mutex);
-	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
-
-		if (!se_dev->dev_attrib.emulate_3pc)
-			continue;
-
-		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
-		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
-
-		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
-		if (rc != 0)
-			continue;
-
-		*found_dev = se_dev;
-		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
-
-		rc = target_depend_item(&se_dev->dev_group.cg_item);
-		if (rc != 0) {
-			pr_err("configfs_depend_item attempt failed:"
-				" %d for se_dev: %p\n", rc, se_dev);
-			mutex_unlock(&g_device_mutex);
-			return rc;
-		}
-
-		pr_debug("Called configfs_depend_item for se_dev: %p"
-			" se_dev->se_dev_group: %p\n", se_dev,
-			&se_dev->dev_group);
-
-		mutex_unlock(&g_device_mutex);
+	if (!se_dev->dev_attrib.emulate_3pc)
 		return 0;
-	}
-	mutex_unlock(&g_device_mutex);
 
-	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
-	return -EINVAL;
+	memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
+	target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
+
+	rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
+	if (rc != 0)
+		return 0;
+
+	info->found_dev = se_dev;
+	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
+
+	rc = target_depend_item(&se_dev->dev_group.cg_item);
+	if (rc != 0) {
+		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
+		       rc, se_dev);
+		return rc;
+	}
+
+	pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
+		 se_dev, &se_dev->dev_group);
+	return 1;
+}
+
+static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
+					struct se_device **found_dev)
+{
+	struct xcopy_dev_search_info info;
+	int ret;
+
+	memset(&info, 0, sizeof(info));
+	info.dev_wwn = dev_wwn;
+
+	ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info);
+	if (ret == 1) {
+		*found_dev = info.found_dev;
+		return 0;
+	} else {
+		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+		return -EINVAL;
+	}
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
@@ -311,9 +325,7 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op
 		(unsigned long long)xop->dst_lba);
 
 	if (dc != 0) {
-		xop->dbl = (desc[29] & 0xff) << 16;
-		xop->dbl |= (desc[30] & 0xff) << 8;
-		xop->dbl |= desc[31] & 0xff;
+		xop->dbl = get_unaligned_be24(&desc[29]);
 
 		pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl);
 	}
@@ -781,13 +793,24 @@ static int target_xcopy_write_destination(
 static void target_xcopy_do_work(struct work_struct *work)
 {
 	struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work);
-	struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev;
 	struct se_cmd *ec_cmd = xop->xop_se_cmd;
-	sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba;
+	struct se_device *src_dev, *dst_dev;
+	sector_t src_lba, dst_lba, end_lba;
 	unsigned int max_sectors;
-	int rc;
-	unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0;
+	int rc = 0;
+	unsigned short nolb, cur_nolb, max_nolb, copied_nolb = 0;
 
+	if (target_parse_xcopy_cmd(xop) != TCM_NO_SENSE)
+		goto err_free;
+
+	if (WARN_ON_ONCE(!xop->src_dev) || WARN_ON_ONCE(!xop->dst_dev))
+		goto err_free;
+
+	src_dev = xop->src_dev;
+	dst_dev = xop->dst_dev;
+	src_lba = xop->src_lba;
+	dst_lba = xop->dst_lba;
+	nolb = xop->nolb;
 	end_lba = src_lba + nolb;
 	/*
 	 * Break up XCOPY I/O into hw_max_sectors sized I/O based on the
@@ -855,6 +878,8 @@ static void target_xcopy_do_work(struct work_struct *work)
 
 out:
 	xcopy_pt_undepend_remotedev(xop);
+
+err_free:
 	kfree(xop);
 	/*
 	 * Don't override an error scsi status if it has already been set
@@ -867,48 +892,22 @@ static void target_xcopy_do_work(struct work_struct *work)
 	target_complete_cmd(ec_cmd, ec_cmd->scsi_status);
 }
 
-sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
+/*
+ * Returns TCM_NO_SENSE upon success or a sense code != TCM_NO_SENSE if parsing
+ * fails.
+ */
+static sense_reason_t target_parse_xcopy_cmd(struct xcopy_op *xop)
 {
-	struct se_device *dev = se_cmd->se_dev;
-	struct xcopy_op *xop = NULL;
+	struct se_cmd *se_cmd = xop->xop_se_cmd;
 	unsigned char *p = NULL, *seg_desc;
-	unsigned int list_id, list_id_usage, sdll, inline_dl, sa;
+	unsigned int list_id, list_id_usage, sdll, inline_dl;
 	sense_reason_t ret = TCM_INVALID_PARAMETER_LIST;
 	int rc;
 	unsigned short tdll;
 
-	if (!dev->dev_attrib.emulate_3pc) {
-		pr_err("EXTENDED_COPY operation explicitly disabled\n");
-		return TCM_UNSUPPORTED_SCSI_OPCODE;
-	}
-
-	sa = se_cmd->t_task_cdb[1] & 0x1f;
-	if (sa != 0x00) {
-		pr_err("EXTENDED_COPY(LID4) not supported\n");
-		return TCM_UNSUPPORTED_SCSI_OPCODE;
-	}
-
-	if (se_cmd->data_length == 0) {
-		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
-		return TCM_NO_SENSE;
-	}
-	if (se_cmd->data_length < XCOPY_HDR_LEN) {
-		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
-				se_cmd->data_length, XCOPY_HDR_LEN);
-		return TCM_PARAMETER_LIST_LENGTH_ERROR;
-	}
-
-	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
-	if (!xop) {
-		pr_err("Unable to allocate xcopy_op\n");
-		return TCM_OUT_OF_RESOURCES;
-	}
-	xop->xop_se_cmd = se_cmd;
-
 	p = transport_kmap_data_sg(se_cmd);
 	if (!p) {
 		pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n");
-		kfree(xop);
 		return TCM_OUT_OF_RESOURCES;
 	}
 
@@ -977,18 +976,57 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 	pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc,
 				rc * XCOPY_TARGET_DESC_LEN);
 	transport_kunmap_data_sg(se_cmd);
-
-	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
-	queue_work(xcopy_wq, &xop->xop_work);
 	return TCM_NO_SENSE;
 
 out:
 	if (p)
 		transport_kunmap_data_sg(se_cmd);
-	kfree(xop);
 	return ret;
 }
 
+sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
+{
+	struct se_device *dev = se_cmd->se_dev;
+	struct xcopy_op *xop;
+	unsigned int sa;
+
+	if (!dev->dev_attrib.emulate_3pc) {
+		pr_err("EXTENDED_COPY operation explicitly disabled\n");
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
+	sa = se_cmd->t_task_cdb[1] & 0x1f;
+	if (sa != 0x00) {
+		pr_err("EXTENDED_COPY(LID4) not supported\n");
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
+	if (se_cmd->data_length == 0) {
+		target_complete_cmd(se_cmd, SAM_STAT_GOOD);
+		return TCM_NO_SENSE;
+	}
+	if (se_cmd->data_length < XCOPY_HDR_LEN) {
+		pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
+				se_cmd->data_length, XCOPY_HDR_LEN);
+		return TCM_PARAMETER_LIST_LENGTH_ERROR;
+	}
+
+	xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
+	if (!xop)
+		goto err;
+	xop->xop_se_cmd = se_cmd;
+	INIT_WORK(&xop->xop_work, target_xcopy_do_work);
+	if (WARN_ON_ONCE(!queue_work(xcopy_wq, &xop->xop_work)))
+		goto free;
+	return TCM_NO_SENSE;
+
+free:
+	kfree(xop);
+
+err:
+	return TCM_OUT_OF_RESOURCES;
+}
+
 static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd)
 {
 	unsigned char *p;

diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
index 0ecf808..e6863c8 100644
--- a/drivers/thermal/broadcom/bcm2835_thermal.c
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c

@@ -245,7 +245,6 @@ static int bcm2835_thermal_probe(struct platform_device *pdev)
 		 */
 		err = tz->ops->get_trip_temp(tz, 0, &trip_temp);
 		if (err < 0) {
-			err = PTR_ERR(tz);
 			dev_err(&pdev->dev,
 				"Not able to read trip_temp: %d\n",
 				err);

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 69d0f43..908a801 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c

@@ -49,40 +49,45 @@
  */
 
 /**
- * struct power_table - frequency to power conversion
+ * struct freq_table - frequency table along with power entries
  * @frequency:	frequency in KHz
  * @power:	power in mW
  *
  * This structure is built when the cooling device registers and helps
- * in translating frequency to power and viceversa.
+ * in translating frequency to power and vice versa.
  */
-struct power_table {
+struct freq_table {
 	u32 frequency;
 	u32 power;
 };
 
 /**
+ * struct time_in_idle - Idle time stats
+ * @time: previous reading of the absolute time that this cpu was idle
+ * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
+ */
+struct time_in_idle {
+	u64 time;
+	u64 timestamp;
+};
+
+/**
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
  *	registered.
- * @cool_dev: thermal_cooling_device pointer to keep track of the
- *	registered cooling device.
+ * @last_load: load measured by the latest call to cpufreq_get_requested_power()
  * @cpufreq_state: integer value representing the current state of cpufreq
  *	cooling	devices.
  * @clipped_freq: integer value representing the absolute value of the clipped
  *	frequency.
  * @max_level: maximum cooling level. One less than total number of valid
  *	cpufreq frequencies.
- * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
+ * @freq_table: Freq table in descending order of frequencies
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @policy: cpufreq policy.
  * @node: list_head to link all cpufreq_cooling_device together.
- * @last_load: load measured by the latest call to cpufreq_get_requested_power()
- * @time_in_idle: previous reading of the absolute time that this cpu was idle
- * @time_in_idle_timestamp: wall time of the last invocation of
- *	get_cpu_idle_time_us()
- * @dyn_power_table: array of struct power_table for frequency to power
- *	conversion, sorted in ascending order.
- * @dyn_power_table_entries: number of entries in the @dyn_power_table array
- * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered
+ * @idle_time: idle time stats
  * @plat_get_static_power: callback to calculate the static power
  *
  * This structure is required for keeping information of each registered
@@ -90,81 +95,45 @@ struct power_table {
  */
 struct cpufreq_cooling_device {
 	int id;
-	struct thermal_cooling_device *cool_dev;
+	u32 last_load;
 	unsigned int cpufreq_state;
 	unsigned int clipped_freq;
 	unsigned int max_level;
-	unsigned int *freq_table;	/* In descending order */
-	struct cpumask allowed_cpus;
+	struct freq_table *freq_table;	/* In descending order */
+	struct thermal_cooling_device *cdev;
+	struct cpufreq_policy *policy;
 	struct list_head node;
-	u32 last_load;
-	u64 *time_in_idle;
-	u64 *time_in_idle_timestamp;
-	struct power_table *dyn_power_table;
-	int dyn_power_table_entries;
-	struct device *cpu_dev;
+	struct time_in_idle *idle_time;
 	get_static_t plat_get_static_power;
 };
-static DEFINE_IDA(cpufreq_ida);
 
+static DEFINE_IDA(cpufreq_ida);
 static DEFINE_MUTEX(cooling_list_lock);
-static LIST_HEAD(cpufreq_dev_list);
+static LIST_HEAD(cpufreq_cdev_list);
 
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
  * get_level: Find the level for a particular frequency
- * @cpufreq_dev: cpufreq_dev for which the property is required
+ * @cpufreq_cdev: cpufreq_cdev for which the property is required
  * @freq: Frequency
  *
- * Return: level on success, THERMAL_CSTATE_INVALID on error.
+ * Return: level corresponding to the frequency.
  */
-static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
 			       unsigned int freq)
 {
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 	unsigned long level;
 
-	for (level = 0; level <= cpufreq_dev->max_level; level++) {
-		if (freq == cpufreq_dev->freq_table[level])
-			return level;
-
-		if (freq > cpufreq_dev->freq_table[level])
+	for (level = 1; level <= cpufreq_cdev->max_level; level++)
+		if (freq > freq_table[level].frequency)
 			break;
-	}
 
-	return THERMAL_CSTATE_INVALID;
+	return level - 1;
 }
 
 /**
- * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
- * @cpu: cpu for which the level is required
- * @freq: the frequency of interest
- *
- * This function will match the cooling level corresponding to the
- * requested @freq and return it.
- *
- * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
- * otherwise.
- */
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	struct cpufreq_cooling_device *cpufreq_dev;
-
-	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
-		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
-			mutex_unlock(&cooling_list_lock);
-			return get_level(cpufreq_dev, freq);
-		}
-	}
-	mutex_unlock(&cooling_list_lock);
-
-	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
-	return THERMAL_CSTATE_INVALID;
-}
-EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
-
-/**
  * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  * @nb:	struct notifier_block * with callback info.
  * @event: value showing cpufreq event for which this function invoked.
@@ -181,14 +150,18 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 {
 	struct cpufreq_policy *policy = data;
 	unsigned long clipped_freq;
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 
 	if (event != CPUFREQ_ADJUST)
 		return NOTIFY_DONE;
 
 	mutex_lock(&cooling_list_lock);
-	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
-		if (!cpumask_test_cpu(policy->cpu, &cpufreq_dev->allowed_cpus))
+	list_for_each_entry(cpufreq_cdev, &cpufreq_cdev_list, node) {
+		/*
+		 * A new copy of the policy is sent to the notifier and can't
+		 * compare that directly.
+		 */
+		if (policy->cpu != cpufreq_cdev->policy->cpu)
 			continue;
 
 		/*
@@ -202,7 +175,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 		 * But, if clipped_freq is greater than policy->max, we don't
 		 * need to do anything.
 		 */
-		clipped_freq = cpufreq_dev->clipped_freq;
+		clipped_freq = cpufreq_cdev->clipped_freq;
 
 		if (policy->max > clipped_freq)
 			cpufreq_verify_within_limits(policy, 0, clipped_freq);
@@ -214,63 +187,63 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 }
 
 /**
- * build_dyn_power_table() - create a dynamic power to frequency table
- * @cpufreq_device:	the cpufreq cooling device in which to store the table
+ * update_freq_table() - Update the freq table with power numbers
+ * @cpufreq_cdev:	the cpufreq cooling device in which to update the table
  * @capacitance: dynamic power coefficient for these cpus
  *
- * Build a dynamic power to frequency table for this cpu and store it
- * in @cpufreq_device.  This table will be used in cpu_power_to_freq() and
- * cpu_freq_to_power() to convert between power and frequency
- * efficiently.  Power is stored in mW, frequency in KHz.  The
- * resulting table is in ascending order.
+ * Update the freq table with power numbers.  This table will be used in
+ * cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
+ * frequency efficiently.  Power is stored in mW, frequency in KHz.  The
+ * resulting table is in descending order.
  *
  * Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
- * -ENOMEM if we run out of memory or -EAGAIN if an OPP was
- * added/enabled while the function was executing.
+ * or -ENOMEM if we run out of memory.
  */
-static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
-				 u32 capacitance)
+static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
+			     u32 capacitance)
 {
-	struct power_table *power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 	struct dev_pm_opp *opp;
 	struct device *dev = NULL;
-	int num_opps = 0, cpu, i, ret = 0;
-	unsigned long freq;
+	int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
 
-	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
-		dev = get_cpu_device(cpu);
-		if (!dev) {
-			dev_warn(&cpufreq_device->cool_dev->device,
-				 "No cpu device for cpu %d\n", cpu);
-			continue;
-		}
-
-		num_opps = dev_pm_opp_get_opp_count(dev);
-		if (num_opps > 0)
-			break;
-		else if (num_opps < 0)
-			return num_opps;
+	dev = get_cpu_device(cpu);
+	if (unlikely(!dev)) {
+		dev_warn(&cpufreq_cdev->cdev->device,
+			 "No cpu device for cpu %d\n", cpu);
+		return -ENODEV;
 	}
 
-	if (num_opps == 0)
+	num_opps = dev_pm_opp_get_opp_count(dev);
+	if (num_opps < 0)
+		return num_opps;
+
+	/*
+	 * The cpufreq table is also built from the OPP table and so the count
+	 * should match.
+	 */
+	if (num_opps != cpufreq_cdev->max_level + 1) {
+		dev_warn(dev, "Number of OPPs not matching with max_levels\n");
 		return -EINVAL;
+	}
 
-	power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL);
-	if (!power_table)
-		return -ENOMEM;
-
-	for (freq = 0, i = 0;
-	     opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp);
-	     freq++, i++) {
-		u32 freq_mhz, voltage_mv;
+	for (i = 0; i <= cpufreq_cdev->max_level; i++) {
+		unsigned long freq = freq_table[i].frequency * 1000;
+		u32 freq_mhz = freq_table[i].frequency / 1000;
 		u64 power;
+		u32 voltage_mv;
 
-		if (i >= num_opps) {
-			ret = -EAGAIN;
-			goto free_power_table;
+		/*
+		 * Find ceil frequency as 'freq' may be slightly lower than OPP
+		 * freq due to truncation while converting to kHz.
+		 */
+		opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+		if (IS_ERR(opp)) {
+			dev_err(dev, "failed to get opp for %lu frequency\n",
+				freq);
+			return -EINVAL;
 		}
 
-		freq_mhz = freq / 1000000;
 		voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
 		dev_pm_opp_put(opp);
 
@@ -281,89 +254,73 @@ static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device,
 		power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
 		do_div(power, 1000000000);
 
-		/* frequency is stored in power_table in KHz */
-		power_table[i].frequency = freq / 1000;
-
 		/* power is stored in mW */
-		power_table[i].power = power;
+		freq_table[i].power = power;
 	}
 
-	if (i != num_opps) {
-		ret = PTR_ERR(opp);
-		goto free_power_table;
-	}
-
-	cpufreq_device->cpu_dev = dev;
-	cpufreq_device->dyn_power_table = power_table;
-	cpufreq_device->dyn_power_table_entries = i;
-
 	return 0;
-
-free_power_table:
-	kfree(power_table);
-
-	return ret;
 }
 
-static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device,
+static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 freq)
 {
 	int i;
-	struct power_table *pt = cpufreq_device->dyn_power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
-		if (freq < pt[i].frequency)
+	for (i = 1; i <= cpufreq_cdev->max_level; i++)
+		if (freq > freq_table[i].frequency)
 			break;
 
-	return pt[i - 1].power;
+	return freq_table[i - 1].power;
 }
 
-static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device,
+static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
 			     u32 power)
 {
 	int i;
-	struct power_table *pt = cpufreq_device->dyn_power_table;
+	struct freq_table *freq_table = cpufreq_cdev->freq_table;
 
-	for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++)
-		if (power < pt[i].power)
+	for (i = 1; i <= cpufreq_cdev->max_level; i++)
+		if (power > freq_table[i].power)
 			break;
 
-	return pt[i - 1].frequency;
+	return freq_table[i - 1].frequency;
 }
 
 /**
  * get_load() - get load for a cpu since last updated
- * @cpufreq_device:	&struct cpufreq_cooling_device for this cpu
+ * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
  * @cpu:	cpu number
- * @cpu_idx:	index of the cpu in cpufreq_device->allowed_cpus
+ * @cpu_idx:	index of the cpu in time_in_idle*
  *
  * Return: The average load of cpu @cpu in percentage since this
  * function was last called.
  */
-static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
+static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
 		    int cpu_idx)
 {
 	u32 load;
 	u64 now, now_idle, delta_time, delta_idle;
+	struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
 
 	now_idle = get_cpu_idle_time(cpu, &now, 0);
-	delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx];
-	delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx];
+	delta_idle = now_idle - idle_time->time;
+	delta_time = now - idle_time->timestamp;
 
 	if (delta_time <= delta_idle)
 		load = 0;
 	else
 		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
 
-	cpufreq_device->time_in_idle[cpu_idx] = now_idle;
-	cpufreq_device->time_in_idle_timestamp[cpu_idx] = now;
+	idle_time->time = now_idle;
+	idle_time->timestamp = now;
 
 	return load;
 }
 
 /**
  * get_static_power() - calculate the static power consumed by the cpus
- * @cpufreq_device:	struct &cpufreq_cooling_device for this cpu cdev
+ * @cpufreq_cdev:	struct &cpufreq_cooling_device for this cpu cdev
  * @tz:		thermal zone device in which we're operating
  * @freq:	frequency in KHz
  * @power:	pointer in which to store the calculated static power
@@ -376,26 +333,28 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu,
  *
  * Return: 0 on success, -E* on failure.
  */
-static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
+static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			    struct thermal_zone_device *tz, unsigned long freq,
 			    u32 *power)
 {
 	struct dev_pm_opp *opp;
 	unsigned long voltage;
-	struct cpumask *cpumask = &cpufreq_device->allowed_cpus;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
+	struct cpumask *cpumask = policy->related_cpus;
 	unsigned long freq_hz = freq * 1000;
+	struct device *dev;
 
-	if (!cpufreq_device->plat_get_static_power ||
-	    !cpufreq_device->cpu_dev) {
+	if (!cpufreq_cdev->plat_get_static_power) {
 		*power = 0;
 		return 0;
 	}
 
-	opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
-					 true);
+	dev = get_cpu_device(policy->cpu);
+	WARN_ON(!dev);
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
 	if (IS_ERR(opp)) {
-		dev_warn_ratelimited(cpufreq_device->cpu_dev,
-				     "Failed to find OPP for frequency %lu: %ld\n",
+		dev_warn_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
 				     freq_hz, PTR_ERR(opp));
 		return -EINVAL;
 	}
@@ -404,31 +363,30 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
-		dev_err_ratelimited(cpufreq_device->cpu_dev,
-				    "Failed to get voltage for frequency %lu\n",
+		dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n",
 				    freq_hz);
 		return -EINVAL;
 	}
 
-	return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay,
-						     voltage, power);
+	return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay,
+						  voltage, power);
 }
 
 /**
  * get_dynamic_power() - calculate the dynamic power
- * @cpufreq_device:	&cpufreq_cooling_device for this cdev
+ * @cpufreq_cdev:	&cpufreq_cooling_device for this cdev
  * @freq:	current frequency
  *
  * Return: the dynamic power consumed by the cpus described by
- * @cpufreq_device.
+ * @cpufreq_cdev.
  */
-static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
+static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
 			     unsigned long freq)
 {
 	u32 raw_cpu_power;
 
-	raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq);
-	return (raw_cpu_power * cpufreq_device->last_load) / 100;
+	raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
+	return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
 }
 
 /* cpufreq cooling device callback functions are defined below */
@@ -446,9 +404,9 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device,
 static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	*state = cpufreq_device->max_level;
+	*state = cpufreq_cdev->max_level;
 	return 0;
 }
 
@@ -465,9 +423,9 @@ static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
 static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	*state = cpufreq_device->cpufreq_state;
+	*state = cpufreq_cdev->cpufreq_state;
 
 	return 0;
 }
@@ -485,23 +443,22 @@ static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
 static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 				 unsigned long state)
 {
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
-	unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 	unsigned int clip_freq;
 
 	/* Request state should be less than max_level */
-	if (WARN_ON(state > cpufreq_device->max_level))
+	if (WARN_ON(state > cpufreq_cdev->max_level))
 		return -EINVAL;
 
 	/* Check if the old cooling action is same as new cooling action */
-	if (cpufreq_device->cpufreq_state == state)
+	if (cpufreq_cdev->cpufreq_state == state)
 		return 0;
 
-	clip_freq = cpufreq_device->freq_table[state];
-	cpufreq_device->cpufreq_state = state;
-	cpufreq_device->clipped_freq = clip_freq;
+	clip_freq = cpufreq_cdev->freq_table[state].frequency;
+	cpufreq_cdev->cpufreq_state = state;
+	cpufreq_cdev->clipped_freq = clip_freq;
 
-	cpufreq_update_policy(cpu);
+	cpufreq_update_policy(cpufreq_cdev->policy->cpu);
 
 	return 0;
 }
@@ -536,33 +493,23 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 	unsigned long freq;
 	int i = 0, cpu, ret;
 	u32 static_power, dynamic_power, total_load = 0;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
 	u32 *load_cpu = NULL;
 
-	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
-
-	/*
-	 * All the CPUs are offline, thus the requested power by
-	 * the cdev is 0
-	 */
-	if (cpu >= nr_cpu_ids) {
-		*power = 0;
-		return 0;
-	}
-
-	freq = cpufreq_quick_get(cpu);
+	freq = cpufreq_quick_get(policy->cpu);
 
 	if (trace_thermal_power_cpu_get_power_enabled()) {
-		u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus);
+		u32 ncpus = cpumask_weight(policy->related_cpus);
 
 		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
 	}
 
-	for_each_cpu(cpu, &cpufreq_device->allowed_cpus) {
+	for_each_cpu(cpu, policy->related_cpus) {
 		u32 load;
 
 		if (cpu_online(cpu))
-			load = get_load(cpufreq_device, cpu, i);
+			load = get_load(cpufreq_cdev, cpu, i);
 		else
 			load = 0;
 
@@ -573,19 +520,19 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
 		i++;
 	}
 
-	cpufreq_device->last_load = total_load;
+	cpufreq_cdev->last_load = total_load;
 
-	dynamic_power = get_dynamic_power(cpufreq_device, freq);
-	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	dynamic_power = get_dynamic_power(cpufreq_cdev, freq);
+	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret) {
 		kfree(load_cpu);
 		return ret;
 	}
 
 	if (load_cpu) {
-		trace_thermal_power_cpu_get_power(
-			&cpufreq_device->allowed_cpus,
-			freq, load_cpu, i, dynamic_power, static_power);
+		trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
+						  load_cpu, i, dynamic_power,
+						  static_power);
 
 		kfree(load_cpu);
 	}
@@ -614,38 +561,23 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 			       unsigned long state, u32 *power)
 {
 	unsigned int freq, num_cpus;
-	cpumask_var_t cpumask;
 	u32 static_power, dynamic_power;
 	int ret;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
 
-	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
-		return -ENOMEM;
+	/* Request state should be less than max_level */
+	if (WARN_ON(state > cpufreq_cdev->max_level))
+		return -EINVAL;
 
-	cpumask_and(cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
-	num_cpus = cpumask_weight(cpumask);
+	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
 
-	/* None of our cpus are online, so no power */
-	if (num_cpus == 0) {
-		*power = 0;
-		ret = 0;
-		goto out;
-	}
-
-	freq = cpufreq_device->freq_table[state];
-	if (!freq) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
-	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
+	freq = cpufreq_cdev->freq_table[state].frequency;
+	dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
+	ret = get_static_power(cpufreq_cdev, tz, freq, &static_power);
 	if (ret)
-		goto out;
+		return ret;
 
 	*power = static_power + dynamic_power;
-out:
-	free_cpumask_var(cpumask);
 	return ret;
 }
 
@@ -673,39 +605,27 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 			       struct thermal_zone_device *tz, u32 power,
 			       unsigned long *state)
 {
-	unsigned int cpu, cur_freq, target_freq;
+	unsigned int cur_freq, target_freq;
 	int ret;
 	s32 dyn_power;
 	u32 last_load, normalised_power, static_power;
-	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
+	struct cpufreq_policy *policy = cpufreq_cdev->policy;
 
-	cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask);
-
-	/* None of our cpus are online */
-	if (cpu >= nr_cpu_ids)
-		return -ENODEV;
-
-	cur_freq = cpufreq_quick_get(cpu);
-	ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power);
+	cur_freq = cpufreq_quick_get(policy->cpu);
+	ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power);
 	if (ret)
 		return ret;
 
 	dyn_power = power - static_power;
 	dyn_power = dyn_power > 0 ? dyn_power : 0;
-	last_load = cpufreq_device->last_load ?: 1;
+	last_load = cpufreq_cdev->last_load ?: 1;
 	normalised_power = (dyn_power * 100) / last_load;
-	target_freq = cpu_power_to_freq(cpufreq_device, normalised_power);
+	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
 
-	*state = cpufreq_cooling_get_level(cpu, target_freq);
-	if (*state == THERMAL_CSTATE_INVALID) {
-		dev_err_ratelimited(&cdev->device,
-				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
-				    target_freq, cpu);
-		return -EINVAL;
-	}
-
-	trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus,
-				      target_freq, *state, power);
+	*state = get_level(cpufreq_cdev, target_freq);
+	trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
+				      power);
 	return 0;
 }
 
@@ -748,7 +668,7 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  * Normally this should be same as cpufreq policy->related_cpus.
  * @capacitance: dynamic power coefficient for these cpus
  * @plat_static_func: function to calculate the static power consumed by these
@@ -764,102 +684,68 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
  */
 static struct thermal_cooling_device *
 __cpufreq_cooling_register(struct device_node *np,
-			const struct cpumask *clip_cpus, u32 capacitance,
+			struct cpufreq_policy *policy, u32 capacitance,
 			get_static_t plat_static_func)
 {
-	struct cpufreq_policy *policy;
-	struct thermal_cooling_device *cool_dev;
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct thermal_cooling_device *cdev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	struct cpufreq_frequency_table *pos, *table;
-	cpumask_var_t temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
 	bool first;
 
-	if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
+	if (IS_ERR_OR_NULL(policy)) {
+		pr_err("%s: cpufreq policy isn't valid: %p", __func__, policy);
+		return ERR_PTR(-EINVAL);
+	}
+
+	i = cpufreq_table_count_valid_entries(policy);
+	if (!i) {
+		pr_debug("%s: CPUFreq table not found or has no valid entries\n",
+			 __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
+	if (!cpufreq_cdev)
 		return ERR_PTR(-ENOMEM);
 
-	cpumask_and(temp_mask, clip_cpus, cpu_online_mask);
-	policy = cpufreq_cpu_get(cpumask_first(temp_mask));
-	if (!policy) {
-		pr_debug("%s: CPUFreq policy not found\n", __func__);
-		cool_dev = ERR_PTR(-EPROBE_DEFER);
-		goto free_cpumask;
-	}
-
-	table = policy->freq_table;
-	if (!table) {
-		pr_debug("%s: CPUFreq table not found\n", __func__);
-		cool_dev = ERR_PTR(-ENODEV);
-		goto put_policy;
-	}
-
-	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
-	if (!cpufreq_dev) {
-		cool_dev = ERR_PTR(-ENOMEM);
-		goto put_policy;
-	}
-
-	num_cpus = cpumask_weight(clip_cpus);
-	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
-					    sizeof(*cpufreq_dev->time_in_idle),
-					    GFP_KERNEL);
-	if (!cpufreq_dev->time_in_idle) {
-		cool_dev = ERR_PTR(-ENOMEM);
+	cpufreq_cdev->policy = policy;
+	num_cpus = cpumask_weight(policy->related_cpus);
+	cpufreq_cdev->idle_time = kcalloc(num_cpus,
+					 sizeof(*cpufreq_cdev->idle_time),
+					 GFP_KERNEL);
+	if (!cpufreq_cdev->idle_time) {
+		cdev = ERR_PTR(-ENOMEM);
 		goto free_cdev;
 	}
 
-	cpufreq_dev->time_in_idle_timestamp =
-		kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp),
-			GFP_KERNEL);
-	if (!cpufreq_dev->time_in_idle_timestamp) {
-		cool_dev = ERR_PTR(-ENOMEM);
-		goto free_time_in_idle;
-	}
-
-	/* Find max levels */
-	cpufreq_for_each_valid_entry(pos, table)
-		cpufreq_dev->max_level++;
-
-	cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
-					  cpufreq_dev->max_level, GFP_KERNEL);
-	if (!cpufreq_dev->freq_table) {
-		cool_dev = ERR_PTR(-ENOMEM);
-		goto free_time_in_idle_timestamp;
-	}
-
 	/* max_level is an index, not a counter */
-	cpufreq_dev->max_level--;
+	cpufreq_cdev->max_level = i - 1;
 
-	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
-
-	if (capacitance) {
-		cpufreq_dev->plat_get_static_power = plat_static_func;
-
-		ret = build_dyn_power_table(cpufreq_dev, capacitance);
-		if (ret) {
-			cool_dev = ERR_PTR(ret);
-			goto free_table;
-		}
-
-		cooling_ops = &cpufreq_power_cooling_ops;
-	} else {
-		cooling_ops = &cpufreq_cooling_ops;
+	cpufreq_cdev->freq_table = kmalloc_array(i,
+					sizeof(*cpufreq_cdev->freq_table),
+					GFP_KERNEL);
+	if (!cpufreq_cdev->freq_table) {
+		cdev = ERR_PTR(-ENOMEM);
+		goto free_idle_time;
 	}
 
 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
-		cool_dev = ERR_PTR(ret);
-		goto free_power_table;
+		cdev = ERR_PTR(ret);
+		goto free_table;
 	}
-	cpufreq_dev->id = ret;
+	cpufreq_cdev->id = ret;
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+		 cpufreq_cdev->id);
 
 	/* Fill freq-table in descending order of frequencies */
-	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
-		freq = find_next_max(table, freq);
-		cpufreq_dev->freq_table[i] = freq;
+	for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
+		freq = find_next_max(policy->freq_table, freq);
+		cpufreq_cdev->freq_table[i].frequency = freq;
 
 		/* Warn for duplicate entries */
 		if (!freq)
@@ -868,51 +754,54 @@ __cpufreq_cooling_register(struct device_node *np,
 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
 
-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_dev->id);
+	if (capacitance) {
+		cpufreq_cdev->plat_get_static_power = plat_static_func;
 
-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-						      cooling_ops);
-	if (IS_ERR(cool_dev))
+		ret = update_freq_table(cpufreq_cdev, capacitance);
+		if (ret) {
+			cdev = ERR_PTR(ret);
+			goto remove_ida;
+		}
+
+		cooling_ops = &cpufreq_power_cooling_ops;
+	} else {
+		cooling_ops = &cpufreq_cooling_ops;
+	}
+
+	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
+						  cooling_ops);
+	if (IS_ERR(cdev))
 		goto remove_ida;
 
-	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
-	cpufreq_dev->cool_dev = cool_dev;
+	cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
+	cpufreq_cdev->cdev = cdev;
 
 	mutex_lock(&cooling_list_lock);
 	/* Register the notifier for first cpufreq cooling device */
-	first = list_empty(&cpufreq_dev_list);
-	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
+	first = list_empty(&cpufreq_cdev_list);
+	list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
 	mutex_unlock(&cooling_list_lock);
 
 	if (first)
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
 
-	goto put_policy;
+	return cdev;
 
 remove_ida:
-	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
-free_power_table:
-	kfree(cpufreq_dev->dyn_power_table);
+	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
 free_table:
-	kfree(cpufreq_dev->freq_table);
-free_time_in_idle_timestamp:
-	kfree(cpufreq_dev->time_in_idle_timestamp);
-free_time_in_idle:
-	kfree(cpufreq_dev->time_in_idle);
+	kfree(cpufreq_cdev->freq_table);
+free_idle_time:
+	kfree(cpufreq_cdev->idle_time);
 free_cdev:
-	kfree(cpufreq_dev);
-put_policy:
-	cpufreq_cpu_put(policy);
-free_cpumask:
-	free_cpumask_var(temp_mask);
-	return cool_dev;
+	kfree(cpufreq_cdev);
+	return cdev;
 }
 
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -922,16 +811,16 @@ __cpufreq_cooling_register(struct device_node *np,
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(NULL, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
 
 /**
  * of_cpufreq_cooling_register - function to create cpufreq cooling device.
  * @np: a valid struct device_node to the cooling device device tree node
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * @policy: cpufreq policy
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -943,18 +832,18 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, 0, NULL);
+	return __cpufreq_cooling_register(np, policy, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
 
 /**
  * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy:		cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -974,10 +863,10 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
  * on failure, it returns a corresponding ERR_PTR().
  */
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance,
 			       get_static_t plat_static_func)
 {
-	return __cpufreq_cooling_register(NULL, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(NULL, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(cpufreq_power_cooling_register);
@@ -985,7 +874,7 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
 /**
  * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions
  * @np:	a valid struct device_node to the cooling device device tree node
- * @clip_cpus:	cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy
  * @capacitance:	dynamic power coefficient for these cpus
  * @plat_static_func:	function to calculate the static power consumed by these
  *			cpus (optional)
@@ -1007,14 +896,14 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register);
  */
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
 	if (!np)
 		return ERR_PTR(-EINVAL);
 
-	return __cpufreq_cooling_register(np, clip_cpus, capacitance,
+	return __cpufreq_cooling_register(np, policy, capacitance,
 				plat_static_func);
 }
 EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
@@ -1027,30 +916,28 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-	struct cpufreq_cooling_device *cpufreq_dev;
+	struct cpufreq_cooling_device *cpufreq_cdev;
 	bool last;
 
 	if (!cdev)
 		return;
 
-	cpufreq_dev = cdev->devdata;
+	cpufreq_cdev = cdev->devdata;
 
 	mutex_lock(&cooling_list_lock);
-	list_del(&cpufreq_dev->node);
+	list_del(&cpufreq_cdev->node);
 	/* Unregister the notifier for the last cpufreq cooling device */
-	last = list_empty(&cpufreq_dev_list);
+	last = list_empty(&cpufreq_cdev_list);
 	mutex_unlock(&cooling_list_lock);
 
 	if (last)
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 
-	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
-	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
-	kfree(cpufreq_dev->dyn_power_table);
-	kfree(cpufreq_dev->time_in_idle_timestamp);
-	kfree(cpufreq_dev->time_in_idle);
-	kfree(cpufreq_dev->freq_table);
-	kfree(cpufreq_dev);
+	thermal_cooling_device_unregister(cpufreq_cdev->cdev);
+	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
+	kfree(cpufreq_cdev->idle_time);
+	kfree(cpufreq_cdev->freq_table);
+	kfree(cpufreq_cdev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);

diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 68bd1b5..d3469fb 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c

@@ -71,6 +71,7 @@ static long get_target_state(struct thermal_zone_device *tz,
 /**
  * fair_share_throttle - throttles devices associated with the given zone
  * @tz - thermal_zone_device
+ * @trip - trip point index
  *
  * Throttling Logic: This uses three parameters to calculate the new
  * throttle state of the cooling devices associated with the given zone.

diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
index f642966..9c3ce34 100644
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c

@@ -397,8 +397,11 @@ static int hisi_thermal_suspend(struct device *dev)
 static int hisi_thermal_resume(struct device *dev)
 {
 	struct hisi_thermal_data *data = dev_get_drvdata(dev);
+	int ret;
 
-	clk_prepare_enable(data->clk);
+	ret = clk_prepare_enable(data->clk);
+	if (ret)
+		return ret;
 
 	data->irq_enabled = true;
 	hisi_thermal_enable_bind_irq_sensor(data);

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index fb648a4..4798b4b 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c

@@ -8,6 +8,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/cpufreq.h>
 #include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -88,6 +89,7 @@ static struct thermal_soc_data thermal_imx6sx_data = {
 };
 
 struct imx_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *tz;
 	struct thermal_cooling_device *cdev;
 	enum thermal_device_mode mode;
@@ -525,13 +527,18 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
 
-	data->cdev = cpufreq_cooling_register(cpu_present_mask);
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
+	data->cdev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cdev)) {
 		ret = PTR_ERR(data->cdev);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev,
-				"failed to register cpufreq cooling device: %d\n",
-				ret);
+		dev_err(&pdev->dev,
+			"failed to register cpufreq cooling device: %d\n", ret);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -542,6 +549,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev,
 				"failed to get thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -556,6 +564,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -571,6 +580,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 			"failed to register thermal zone device %d\n", ret);
 		clk_disable_unprepare(data->thermal_clk);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -599,6 +609,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		clk_disable_unprepare(data->thermal_clk);
 		thermal_zone_device_unregister(data->tz);
 		cpufreq_cooling_unregister(data->cdev);
+		cpufreq_cpu_put(data->policy);
 		return ret;
 	}
 
@@ -620,6 +631,7 @@ static int imx_thermal_remove(struct platform_device *pdev)
 
 	thermal_zone_device_unregister(data->tz);
 	cpufreq_cooling_unregister(data->cdev);
+	cpufreq_cpu_put(data->policy);
 
 	return 0;
 }
@@ -648,8 +660,11 @@ static int imx_thermal_resume(struct device *dev)
 {
 	struct imx_thermal_data *data = dev_get_drvdata(dev);
 	struct regmap *map = data->tempmon;
+	int ret;
 
-	clk_prepare_enable(data->thermal_clk);
+	ret = clk_prepare_enable(data->thermal_clk);
+	if (ret)
+		return ret;
 	/* Enabled thermal sensor after resume */
 	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);

diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index 2c2ec76..51ceb80 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c

@@ -62,8 +62,8 @@ static int acpi_thermal_rel_release(struct inode *inode, struct file *file)
  * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling
  *
  * @handle: ACPI handle of the device contains _TRT
- * @art_count: the number of valid entries resulted from parsing _TRT
- * @artp: pointer to pointer of array of art entries in parsing result
+ * @trt_count: the number of valid entries resulted from parsing _TRT
+ * @trtp: pointer to pointer of array of _TRT entries in parsing result
  * @create_dev: whether to create platform devices for target and source
  *
  */
@@ -208,7 +208,7 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
 		if (art->target) {
 			result = acpi_bus_get_device(art->target, &adev);
 			if (result)
-				pr_warn("Failed to get source ACPI device\n");
+				pr_warn("Failed to get target ACPI device\n");
 		}
 	}
 

diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index c4890c9..8a7f24d 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c

@@ -238,8 +238,16 @@ static int int3403_add(struct platform_device *pdev)
 	status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
 				       NULL, &priv->type);
 	if (ACPI_FAILURE(status)) {
-		result = -EINVAL;
-		goto err;
+		unsigned long long tmp;
+
+		status = acpi_evaluate_integer(priv->adev->handle, "_TMP",
+					       NULL, &tmp);
+		if (ACPI_FAILURE(status)) {
+			result = -EINVAL;
+			goto err;
+		} else {
+			priv->type = INT3403_TYPE_SENSOR;
+		}
 	}
 
 	platform_set_drvdata(pdev, priv);

diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index bcef2e7..be95826 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c

@@ -186,8 +186,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 /**
  * step_wise_throttle - throttles devices associated with the given zone
  * @tz - thermal_zone_device
- * @trip - the trip point
- * @trip_type - type of the trip point
+ * @trip - trip point index
  *
  * Throttling Logic: This uses the trend of the thermal zone to throttle.
  * If the thermal zone is 'heating up' this throttles all the cooling

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 02790f6..c211a8e 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c

@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/thermal.h>
+#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
@@ -37,6 +38,7 @@
 
 /* common data structures */
 struct ti_thermal_data {
+	struct cpufreq_policy *policy;
 	struct thermal_zone_device *ti_thermal;
 	struct thermal_zone_device *pcb_tz;
 	struct thermal_cooling_device *cool_dev;
@@ -247,15 +249,19 @@ int ti_thermal_register_cpu_cooling(struct ti_bandgap *bgp, int id)
 	if (!data)
 		return -EINVAL;
 
+	data->policy = cpufreq_cpu_get(0);
+	if (!data->policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return -EPROBE_DEFER;
+	}
+
 	/* Register cooling device */
-	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
+	data->cool_dev = cpufreq_cooling_register(data->policy);
 	if (IS_ERR(data->cool_dev)) {
 		int ret = PTR_ERR(data->cool_dev);
-
-		if (ret != -EPROBE_DEFER)
-			dev_err(bgp->dev,
-				"Failed to register cpu cooling device %d\n",
-				ret);
+		dev_err(bgp->dev, "Failed to register cpu cooling device %d\n",
+			ret);
+		cpufreq_cpu_put(data->policy);
 
 		return ret;
 	}
@@ -270,8 +276,10 @@ int ti_thermal_unregister_cpu_cooling(struct ti_bandgap *bgp, int id)
 
 	data = ti_bandgap_get_sensor_data(bgp, id);
 
-	if (data)
+	if (data) {
 		cpufreq_cooling_unregister(data->cool_dev);
+		cpufreq_cpu_put(data->policy);
+	}
 
 	return 0;
 }

diff --git a/drivers/thermal/user_space.c b/drivers/thermal/user_space.c
index c908150..8e92a06 100644
--- a/drivers/thermal/user_space.c
+++ b/drivers/thermal/user_space.c

@@ -24,12 +24,13 @@
 
 #include <linux/thermal.h>
 #include <linux/slab.h>
+
 #include "thermal_core.h"
 
 /**
  * notify_user_space - Notifies user space about thermal events
  * @tz - thermal_zone_device
- * @trip - Trip point index
+ * @trip - trip point index
  *
  * This function notifies the user space through UEvents.
  */

diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c
index 2a61dd6..906ee77 100644
--- a/drivers/tty/serial/ioc3_serial.c
+++ b/drivers/tty/serial/ioc3_serial.c

@@ -377,7 +377,7 @@ static struct ioc3_port *get_ioc3_port(struct uart_port *the_port)
  *			called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc3_port *port)
+static inline int port_init(struct ioc3_port *port)
 {
 	uint32_t sio_cr;
 	struct port_hooks *hooks = port->ip_hooks;
@@ -1430,7 +1430,7 @@ static int receive_chars(struct uart_port *the_port)
  * @pending: interrupts to handle
  */
 
-static int inline
+static inline int
 ioc3uart_intr_one(struct ioc3_submodule *is,
 			struct ioc3_driver_data *idd,
 			unsigned int pending)

diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index f96bcf9..43d7d32 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c

@@ -824,7 +824,7 @@ pending_intrs(struct ioc4_soft *soft, int type)
  *			called per port from attach...
  * @port: port to initialize
  */
-static int inline port_init(struct ioc4_port *port)
+static inline int port_init(struct ioc4_port *port)
 {
 	uint32_t sio_cr;
 	struct hooks *hooks = port->ip_hooks;
@@ -1048,7 +1048,7 @@ static irqreturn_t ioc4_intr(int irq, void *arg)
  *			IOC4 with serial ports in the system.
  * @idd: Master module data for this IOC4
  */
-static int inline ioc4_attach_local(struct ioc4_driver_data *idd)
+static inline int ioc4_attach_local(struct ioc4_driver_data *idd)
 {
 	struct ioc4_port *port;
 	struct ioc4_port *ports[IOC4_NUM_SERIAL_PORTS];

diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index 8a069aa..27d7a70 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c

@@ -180,7 +180,7 @@ static const __u16 crc10_table[256] = {
  * Perform a memcpy and calculate fcs using ppp 10bit CRC algorithm. Return
  * new 10 bit FCS.
  */
-static __u16 __inline__ fcs_compute10(unsigned char *sp, int len, __u16 fcs)
+static inline __u16 fcs_compute10(unsigned char *sp, int len, __u16 fcs)
 {
 	for (; len-- > 0; fcs = CRC10_FCS(fcs, *sp++));
 	return fcs;

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 324c52e..063c1ce 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c

@@ -195,11 +195,11 @@ static bool vfio_pci_nointx(struct pci_dev *pdev)
 	switch (pdev->vendor) {
 	case PCI_VENDOR_ID_INTEL:
 		switch (pdev->device) {
-		/* All i40e (XL710/X710) 10/20/40GbE NICs */
+		/* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
 		case 0x1572:
 		case 0x1574:
 		case 0x1580 ... 0x1581:
-		case 0x1583 ... 0x1589:
+		case 0x1583 ... 0x158b:
 		case 0x37d0 ... 0x37d2:
 			return true;
 		default:

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 561084a..330d505 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c

@@ -382,7 +382,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
 		vfio_group_unlock_and_free(group);
-		return (struct vfio_group *)dev; /* ERR_PTR */
+		return ERR_CAST(dev);
 	}
 
 	group->minor = minor;
@@ -423,6 +423,34 @@ static void vfio_group_put(struct vfio_group *group)
 	kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
 }
 
+struct vfio_group_put_work {
+	struct work_struct work;
+	struct vfio_group *group;
+};
+
+static void vfio_group_put_bg(struct work_struct *work)
+{
+	struct vfio_group_put_work *do_work;
+
+	do_work = container_of(work, struct vfio_group_put_work, work);
+
+	vfio_group_put(do_work->group);
+	kfree(do_work);
+}
+
+static void vfio_group_schedule_put(struct vfio_group *group)
+{
+	struct vfio_group_put_work *do_work;
+
+	do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
+	if (WARN_ON(!do_work))
+		return;
+
+	INIT_WORK(&do_work->work, vfio_group_put_bg);
+	do_work->group = group;
+	schedule_work(&do_work->work);
+}
+
 /* Assume group_lock or group reference is held */
 static void vfio_group_get(struct vfio_group *group)
 {
@@ -762,7 +790,14 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
 		break;
 	}
 
-	vfio_group_put(group);
+	/*
+	 * If we're the last reference to the group, the group will be
+	 * released, which includes unregistering the iommu group notifier.
+	 * We hold a read-lock on that notifier list, unregistering needs
+	 * a write-lock... deadlock.  Release our reference asynchronously
+	 * to avoid that situation.
+	 */
+	vfio_group_schedule_put(group);
 	return NOTIFY_OK;
 }
 
@@ -1140,15 +1175,11 @@ static long vfio_fops_unl_ioctl(struct file *filep,
 		ret = vfio_ioctl_set_iommu(container, arg);
 		break;
 	default:
-		down_read(&container->group_lock);
-
 		driver = container->iommu_driver;
 		data = container->iommu_data;
 
 		if (driver) /* passthrough all unrecognized ioctls */
 			ret = driver->ops->ioctl(data, cmd, arg);
-
-		up_read(&container->group_lock);
 	}
 
 	return ret;
@@ -1202,15 +1233,11 @@ static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
 	struct vfio_iommu_driver *driver;
 	ssize_t ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->read))
 		ret = driver->ops->read(container->iommu_data,
 					buf, count, ppos);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1221,15 +1248,11 @@ static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
 	struct vfio_iommu_driver *driver;
 	ssize_t ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->write))
 		ret = driver->ops->write(container->iommu_data,
 					 buf, count, ppos);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1239,14 +1262,10 @@ static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
 	struct vfio_iommu_driver *driver;
 	int ret = -EINVAL;
 
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->mmap))
 		ret = driver->ops->mmap(container->iommu_data, vma);
 
-	up_read(&container->group_lock);
-
 	return ret;
 }
 
@@ -1741,6 +1760,15 @@ void vfio_group_put_external_user(struct vfio_group *group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
 
+bool vfio_external_group_match_file(struct vfio_group *test_group,
+				    struct file *filep)
+{
+	struct vfio_group *group = filep->private_data;
+
+	return (filep->f_op == &vfio_group_fops) && (group == test_group);
+}
+EXPORT_SYMBOL_GPL(vfio_external_group_match_file);
+
 int vfio_external_user_iommu_id(struct vfio_group *group)
 {
 	return iommu_group_id(group->iommu_group);
@@ -1949,8 +1977,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 		goto err_pin_pages;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->pin_pages))
 		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
@@ -1958,7 +1984,6 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 err_pin_pages:
@@ -1998,8 +2023,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
 		goto err_unpin_pages;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unpin_pages))
 		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
@@ -2007,7 +2030,6 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 err_unpin_pages:
@@ -2029,8 +2051,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
 		return -EINVAL;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->register_notifier))
 		ret = driver->ops->register_notifier(container->iommu_data,
@@ -2038,7 +2058,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2056,8 +2075,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 		return -EINVAL;
 
 	container = group->container;
-	down_read(&container->group_lock);
-
 	driver = container->iommu_driver;
 	if (likely(driver && driver->ops->unregister_notifier))
 		ret = driver->ops->unregister_notifier(container->iommu_data,
@@ -2065,7 +2082,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
 	else
 		ret = -ENOTTY;
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2083,7 +2099,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 					unsigned long *events,
 					struct notifier_block *nb)
 {
-	struct vfio_container *container;
 	int ret;
 	bool set_kvm = false;
 
@@ -2101,9 +2116,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 	if (ret)
 		return -EINVAL;
 
-	container = group->container;
-	down_read(&container->group_lock);
-
 	ret = blocking_notifier_chain_register(&group->notifier, nb);
 
 	/*
@@ -2114,7 +2126,6 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 		blocking_notifier_call_chain(&group->notifier,
 					VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;
@@ -2123,19 +2134,14 @@ static int vfio_register_group_notifier(struct vfio_group *group,
 static int vfio_unregister_group_notifier(struct vfio_group *group,
 					 struct notifier_block *nb)
 {
-	struct vfio_container *container;
 	int ret;
 
 	ret = vfio_group_add_container_user(group);
 	if (ret)
 		return -EINVAL;
 
-	container = group->container;
-	down_read(&container->group_lock);
-
 	ret = blocking_notifier_chain_unregister(&group->notifier, nb);
 
-	up_read(&container->group_lock);
 	vfio_group_try_dissolve_container(group);
 
 	return ret;

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e3d7ea1..06d0448 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c

@@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	struct sk_buff **queue;
 	int i;
 
-	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!n)
 		return -ENOMEM;
 	vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index fd6c8b6..046f6d2 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c

@@ -496,14 +496,12 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
 	struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
 					vs_event_work);
 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
-	struct vhost_scsi_evt *evt;
+	struct vhost_scsi_evt *evt, *t;
 	struct llist_node *llnode;
 
 	mutex_lock(&vq->mutex);
 	llnode = llist_del_all(&vs->vs_event_list);
-	while (llnode) {
-		evt = llist_entry(llnode, struct vhost_scsi_evt, list);
-		llnode = llist_next(llnode);
+	llist_for_each_entry_safe(evt, t, llnode, list) {
 		vhost_scsi_do_evt_work(vs, evt);
 		vhost_scsi_free_evt(vs, evt);
 	}
@@ -529,10 +527,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 
 	bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
 	llnode = llist_del_all(&vs->vs_completion_list);
-	while (llnode) {
-		cmd = llist_entry(llnode, struct vhost_scsi_cmd,
-				     tvc_completion_list);
-		llnode = llist_next(llnode);
+	llist_for_each_entry(cmd, llnode, tvc_completion_list) {
 		se_cmd = &cmd->tvc_se_cmd;
 
 		pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
@@ -1404,7 +1399,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
 	struct vhost_virtqueue **vqs;
 	int r = -ENOMEM, i;
 
-	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
 	if (!vs) {
 		vs = vzalloc(sizeof(*vs));
 		if (!vs)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 3f63e03..c9de9c4 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c

@@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
 	/* This struct is large and allocation could fail, fall back to vmalloc
 	 * if there is no other way.
 	 */
-	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!vsock)
 		return -ENOMEM;
 

diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c
index ec192a1..d0d427a 100644
--- a/drivers/video/console/mdacon.c
+++ b/drivers/video/console/mdacon.c

@@ -48,7 +48,7 @@ static DEFINE_SPINLOCK(mda_lock);
 
 /* description of the hardware layout */
 
-static unsigned long	mda_vram_base;		/* Base of video memory */
+static u16		*mda_vram_base;		/* Base of video memory */
 static unsigned long	mda_vram_len;		/* Size of video memory */
 static unsigned int	mda_num_columns;	/* Number of text columns */
 static unsigned int	mda_num_lines;		/* Number of text lines */
@@ -205,13 +205,20 @@ static int mda_detect(void)
 
 	/* do a memory check */
 
-	p = (u16 *) mda_vram_base;
-	q = (u16 *) (mda_vram_base + 0x01000);
+	p = mda_vram_base;
+	q = mda_vram_base + 0x01000 / 2;
 
-	p_save = scr_readw(p); q_save = scr_readw(q);
+	p_save = scr_readw(p);
+	q_save = scr_readw(q);
 
-	scr_writew(0xAA55, p); if (scr_readw(p) == 0xAA55) count++;
-	scr_writew(0x55AA, p); if (scr_readw(p) == 0x55AA) count++;
+	scr_writew(0xAA55, p);
+	if (scr_readw(p) == 0xAA55)
+		count++;
+
+	scr_writew(0x55AA, p);
+	if (scr_readw(p) == 0x55AA)
+		count++;
+
 	scr_writew(p_save, p);
 
 	if (count != 2) {
@@ -220,13 +227,18 @@ static int mda_detect(void)
 
 	/* check if we have 4K or 8K */
 
-	scr_writew(0xA55A, q); scr_writew(0x0000, p);
-	if (scr_readw(q) == 0xA55A) count++;
+	scr_writew(0xA55A, q);
+	scr_writew(0x0000, p);
+	if (scr_readw(q) == 0xA55A)
+		count++;
 	
-	scr_writew(0x5AA5, q); scr_writew(0x0000, p);
-	if (scr_readw(q) == 0x5AA5) count++;
+	scr_writew(0x5AA5, q);
+	scr_writew(0x0000, p);
+	if (scr_readw(q) == 0x5AA5)
+		count++;
 
-	scr_writew(p_save, p); scr_writew(q_save, q);
+	scr_writew(p_save, p);
+	scr_writew(q_save, q);
 	
 	if (count == 4) {
 		mda_vram_len = 0x02000;
@@ -240,14 +252,12 @@ static int mda_detect(void)
 	/* Edward: These two mess `tests' mess up my cursor on bootup */
 
 	/* cursor low register */
-	if (! test_mda_b(0x66, 0x0f)) {
+	if (!test_mda_b(0x66, 0x0f))
 		return 0;
-	}
 
 	/* cursor low register */
-	if (! test_mda_b(0x99, 0x0f)) {
+	if (!test_mda_b(0x99, 0x0f))
 		return 0;
-	}
 #endif
 
 	/* See if the card is a Hercules, by checking whether the vsync
@@ -257,25 +267,25 @@ static int mda_detect(void)
 	
 	p_save = q_save = inb_p(mda_status_port) & MDA_STATUS_VSYNC;
 
-	for (count=0; count < 50000 && p_save == q_save; count++) {
+	for (count = 0; count < 50000 && p_save == q_save; count++) {
 		q_save = inb(mda_status_port) & MDA_STATUS_VSYNC;
 		udelay(2);
 	}
 
 	if (p_save != q_save) {
 		switch (inb_p(mda_status_port) & 0x70) {
-			case 0x10:
-				mda_type = TYPE_HERCPLUS;
-				mda_type_name = "HerculesPlus";
-				break;
-			case 0x50:
-				mda_type = TYPE_HERCCOLOR;
-				mda_type_name = "HerculesColor";
-				break;
-			default:
-				mda_type = TYPE_HERC;
-				mda_type_name = "Hercules";
-				break;
+		case 0x10:
+			mda_type = TYPE_HERCPLUS;
+			mda_type_name = "HerculesPlus";
+			break;
+		case 0x50:
+			mda_type = TYPE_HERCCOLOR;
+			mda_type_name = "HerculesColor";
+			break;
+		default:
+			mda_type = TYPE_HERC;
+			mda_type_name = "Hercules";
+			break;
 		}
 	}
 
@@ -313,7 +323,7 @@ static const char *mdacon_startup(void)
 	mda_num_lines   = 25;
 
 	mda_vram_len  = 0x01000;
-	mda_vram_base = VGA_MAP_MEM(0xb0000, mda_vram_len);
+	mda_vram_base = (u16 *)VGA_MAP_MEM(0xb0000, mda_vram_len);
 
 	mda_index_port  = 0x3b4;
 	mda_value_port  = 0x3b5;
@@ -410,17 +420,20 @@ static void mdacon_invert_region(struct vc_data *c, u16 *p, int count)
 	}
 }
 
-#define MDA_ADDR(x,y)  ((u16 *) mda_vram_base + (y)*mda_num_columns + (x))
+static inline u16 *mda_addr(unsigned int x, unsigned int y)
+{
+	return mda_vram_base + y * mda_num_columns + x;
+}
 
 static void mdacon_putc(struct vc_data *c, int ch, int y, int x)
 {
-	scr_writew(mda_convert_attr(ch), MDA_ADDR(x, y));
+	scr_writew(mda_convert_attr(ch), mda_addr(x, y));
 }
 
 static void mdacon_putcs(struct vc_data *c, const unsigned short *s,
 		         int count, int y, int x)
 {
-	u16 *dest = MDA_ADDR(x, y);
+	u16 *dest = mda_addr(x, y);
 
 	for (; count > 0; count--) {
 		scr_writew(mda_convert_attr(scr_readw(s++)), dest++);
@@ -430,7 +443,7 @@ static void mdacon_putcs(struct vc_data *c, const unsigned short *s,
 static void mdacon_clear(struct vc_data *c, int y, int x, 
 			  int height, int width)
 {
-	u16 *dest = MDA_ADDR(x, y);
+	u16 *dest = mda_addr(x, y);
 	u16 eattr = mda_convert_attr(c->vc_video_erase_char);
 
 	if (width <= 0 || height <= 0)
@@ -453,7 +466,7 @@ static int mdacon_blank(struct vc_data *c, int blank, int mode_switch)
 {
 	if (mda_type == TYPE_MDA) {
 		if (blank) 
-			scr_memsetw((void *)mda_vram_base, 
+			scr_memsetw(mda_vram_base,
 				mda_convert_attr(c->vc_video_erase_char),
 				c->vc_screenbuf_size);
 		/* Tell console.c that it has to restore the screen itself */
@@ -502,16 +515,16 @@ static bool mdacon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
 	switch (dir) {
 
 	case SM_UP:
-		scr_memmovew(MDA_ADDR(0,t), MDA_ADDR(0,t+lines),
+		scr_memmovew(mda_addr(0, t), mda_addr(0, t + lines),
 				(b-t-lines)*mda_num_columns*2);
-		scr_memsetw(MDA_ADDR(0,b-lines), eattr,
+		scr_memsetw(mda_addr(0, b - lines), eattr,
 				lines*mda_num_columns*2);
 		break;
 
 	case SM_DOWN:
-		scr_memmovew(MDA_ADDR(0,t+lines), MDA_ADDR(0,t),
+		scr_memmovew(mda_addr(0, t + lines), mda_addr(0, t),
 				(b-t-lines)*mda_num_columns*2);
-		scr_memsetw(MDA_ADDR(0,t), eattr, lines*mda_num_columns*2);
+		scr_memsetw(mda_addr(0, t), eattr, lines*mda_num_columns*2);
 		break;
 	}
 

diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c
index 11026e7..b55fdac 100644
--- a/drivers/video/fbdev/aty/atyfb_base.c
+++ b/drivers/video/fbdev/aty/atyfb_base.c

@@ -802,7 +802,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 {
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	u32 xres, yres, vxres, vyres, xoffset, yoffset, bpp;
-	u32 sync, vmode, vdisplay;
+	u32 sync, vmode;
 	u32 h_total, h_disp, h_sync_strt, h_sync_end, h_sync_dly, h_sync_wid, h_sync_pol;
 	u32 v_total, v_disp, v_sync_strt, v_sync_end, v_sync_wid, v_sync_pol, c_sync;
 	u32 pix_width, dp_pix_width, dp_chain_mask;
@@ -984,12 +984,6 @@ static int aty_var_to_crtc(const struct fb_info *info,
 		v_total <<= 1;
 	}
 
-	vdisplay = yres;
-#ifdef CONFIG_FB_ATY_GENERIC_LCD
-	if ((par->lcd_table != 0) && (crtc->lcd_gen_cntl & LCD_ON))
-		vdisplay  = par->lcd_height;
-#endif
-
 	v_disp--;
 	v_sync_strt--;
 	v_sync_end--;
@@ -1036,7 +1030,7 @@ static int aty_var_to_crtc(const struct fb_info *info,
 		crtc->gen_cntl |= CRTC_INTERLACE_EN;
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 	if (par->lcd_table != 0) {
-		vdisplay = yres;
+		u32 vdisplay = yres;
 		if (vmode & FB_VMODE_DOUBLE)
 			vdisplay <<= 1;
 		crtc->gen_cntl &= ~(CRTC2_EN | CRTC2_PIX_WIDTH);

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 5324358..7a42238 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c

@@ -1483,7 +1483,7 @@ __releases(&info->lock)
 	return 0;
 }
 
-#ifdef CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA
+#if defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && !defined(CONFIG_MMU)
 unsigned long get_fb_unmapped_area(struct file *filp,
 				   unsigned long addr, unsigned long len,
 				   unsigned long pgoff, unsigned long flags)
@@ -1510,7 +1510,8 @@ static const struct file_operations fb_fops = {
 	.open =		fb_open,
 	.release =	fb_release,
 #if defined(HAVE_ARCH_FB_UNMAPPED_AREA) || \
-    defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA)
+	(defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && \
+	 !defined(CONFIG_MMU))
 	.get_unmapped_area = get_fb_unmapped_area,
 #endif
 #ifdef CONFIG_FB_DEFERRED_IO

diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index ca3d6b3..25abbcf 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c

@@ -388,7 +388,7 @@ struct fsl_diu_data {
 /* Determine the DMA address of a member of the fsl_diu_data structure */
 #define DMA_ADDR(p, f) ((p)->dma_addr + offsetof(struct fsl_diu_data, f))
 
-static struct mfb_info mfb_template[] = {
+static const struct mfb_info mfb_template[] = {
 	{
 		.index = PLANE0,
 		.id = "Panel0",
@@ -1868,7 +1868,7 @@ static int __init fsl_diu_setup(char *options)
 }
 #endif
 
-static struct of_device_id fsl_diu_match[] = {
+static const struct of_device_id fsl_diu_match[] = {
 #ifdef CONFIG_PPC_MPC512x
 	{
 		.compatible = "fsl,mpc5121-diu",

diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
index 6b444400..ffc3912 100644
--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
+++ b/drivers/video/fbdev/intelfb/intelfbdrv.c

@@ -907,7 +907,7 @@ static void intelfb_pci_unregister(struct pci_dev *pdev)
  *                       helper functions                      *
  ***************************************************************/
 
-int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
+__inline__ int intelfb_var_to_depth(const struct fb_var_screeninfo *var)
 {
 	DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n",
 		var->bits_per_pixel, var->green.length);

diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c
index 11eb094..f6a0b9a 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.c
+++ b/drivers/video/fbdev/matrox/matroxfb_base.c

@@ -2001,7 +2001,7 @@ static void matroxfb_register_device(struct matrox_fb_info* minfo) {
 	for (drv = matroxfb_driver_l(matroxfb_driver_list.next);
 	     drv != matroxfb_driver_l(&matroxfb_driver_list);
 	     drv = matroxfb_driver_l(drv->node.next)) {
-		if (drv && drv->probe) {
+		if (drv->probe) {
 			void *p = drv->probe(minfo);
 			if (p) {
 				minfo->drivers_data[i] = p;

diff --git a/drivers/video/fbdev/omap/lcdc.c b/drivers/video/fbdev/omap/lcdc.c
index e3d9b9e..938cba0 100644
--- a/drivers/video/fbdev/omap/lcdc.c
+++ b/drivers/video/fbdev/omap/lcdc.c

@@ -79,12 +79,12 @@ static struct omap_lcd_controller {
 	unsigned long		vram_size;
 } lcdc;
 
-static void inline enable_irqs(int mask)
+static inline void enable_irqs(int mask)
 {
 	lcdc.irq_mask |= mask;
 }
 
-static void inline disable_irqs(int mask)
+static inline void disable_irqs(int mask)
 {
 	lcdc.irq_mask &= ~mask;
 }
@@ -466,7 +466,7 @@ static void calc_ck_div(int is_tft, int pck, int *pck_div)
 	}
 }
 
-static void inline setup_regs(void)
+static inline void setup_regs(void)
 {
 	u32 l;
 	struct lcd_panel *panel = lcdc.fbdev->panel;

diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index f4cbfb3..3479a47 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c

@@ -62,7 +62,7 @@ struct caps_table_struct {
 	const char *name;
 };
 
-static struct caps_table_struct ctrl_caps[] = {
+static const struct caps_table_struct ctrl_caps[] = {
 	{ OMAPFB_CAPS_MANUAL_UPDATE,  "manual update" },
 	{ OMAPFB_CAPS_TEARSYNC,       "tearing synchronization" },
 	{ OMAPFB_CAPS_PLANE_RELOCATE_MEM, "relocate plane memory" },
@@ -74,7 +74,7 @@ static struct caps_table_struct ctrl_caps[] = {
 	{ OMAPFB_CAPS_SET_BACKLIGHT,  "backlight setting" },
 };
 
-static struct caps_table_struct color_caps[] = {
+static const struct caps_table_struct color_caps[] = {
 	{ 1 << OMAPFB_COLOR_RGB565,	"RGB565", },
 	{ 1 << OMAPFB_COLOR_YUV422,	"YUV422", },
 	{ 1 << OMAPFB_COLOR_YUV420,	"YUV420", },
@@ -1384,7 +1384,7 @@ static struct attribute *panel_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group panel_attr_grp = {
+static const struct attribute_group panel_attr_grp = {
 	.name  = "panel",
 	.attrs = panel_attrs,
 };
@@ -1406,7 +1406,7 @@ static struct attribute *ctrl_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ctrl_attr_grp = {
+static const struct attribute_group ctrl_attr_grp = {
 	.name  = "ctrl",
 	.attrs = ctrl_attrs,
 };

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index fd2b372d..bef4315 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c

@@ -100,7 +100,7 @@ static void hw_guard_wait(struct panel_drv_data *ddata)
 {
 	unsigned long wait = ddata->hw_guard_end - jiffies;
 
-	if ((long)wait > 0 && wait <= ddata->hw_guard_wait) {
+	if ((long)wait > 0 && time_before_eq(wait, ddata->hw_guard_wait)) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(wait);
 	}
@@ -559,7 +559,7 @@ static struct attribute *dsicm_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group dsicm_attr_group = {
+static const struct attribute_group dsicm_attr_group = {
 	.attrs = dsicm_attrs,
 };
 

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
index 9e2a67f..44b96af 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/manager-sysfs.c

@@ -182,22 +182,16 @@ static ssize_t manager_trans_key_type_show(struct omap_overlay_manager *mgr,
 static ssize_t manager_trans_key_type_store(struct omap_overlay_manager *mgr,
 					    const char *buf, size_t size)
 {
-	enum omap_dss_trans_key_type key_type;
 	struct omap_overlay_manager_info info;
 	int r;
 
-	for (key_type = OMAP_DSS_COLOR_KEY_GFX_DST;
-			key_type < ARRAY_SIZE(trans_key_type_str); key_type++) {
-		if (sysfs_streq(buf, trans_key_type_str[key_type]))
-			break;
-	}
-
-	if (key_type == ARRAY_SIZE(trans_key_type_str))
-		return -EINVAL;
+	r = sysfs_match_string(trans_key_type_str, buf);
+	if (r < 0)
+		return r;
 
 	mgr->get_manager_info(mgr, &info);
 
-	info.trans_key_type = key_type;
+	info.trans_key_type = r;
 
 	r = mgr->set_manager_info(mgr, &info);
 	if (r)

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index b21a89b..c3d49e1 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c

@@ -1436,7 +1436,10 @@ static void pxafb_enable_controller(struct pxafb_info *fbi)
 	pr_debug("reg_lccr3 0x%08x\n", (unsigned int) fbi->reg_lccr3);
 
 	/* enable LCD controller clock */
-	clk_prepare_enable(fbi->clk);
+	if (clk_prepare_enable(fbi->clk)) {
+		pr_err("%s: Failed to prepare clock\n", __func__);
+		return;
+	}
 
 	if (fbi->lccr0 & LCCR0_LCDT)
 		return;

diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index 885ee3a..c3a4650 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c

@@ -2301,7 +2301,7 @@ static int sh_mobile_lcdc_check_fb(struct backlight_device *bdev,
 	return (info->bl_dev == bdev);
 }
 
-static struct backlight_ops sh_mobile_lcdc_bl_ops = {
+static const struct backlight_ops sh_mobile_lcdc_bl_ops = {
 	.options	= BL_CORE_SUSPENDRESUME,
 	.update_status	= sh_mobile_lcdc_update_bl,
 	.get_brightness	= sh_mobile_lcdc_get_brightness,

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 98af9e0..dc0e8d9 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c

@@ -5,6 +5,9 @@
  *     Loosely based upon the vesafb driver.
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -149,8 +152,8 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 	 * allowed by connector.
 	 */
 	if (sizeof(*m) + len > CONNECTOR_MAX_MSG_SIZE) {
-		printk(KERN_WARNING "uvesafb: message too long (%d), "
-			"can't execute task\n", (int)(sizeof(*m) + len));
+		pr_warn("message too long (%d), can't execute task\n",
+			(int)(sizeof(*m) + len));
 		return -E2BIG;
 	}
 
@@ -198,10 +201,8 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 		 */
 		err = uvesafb_helper_start();
 		if (err) {
-			printk(KERN_ERR "uvesafb: failed to execute %s\n",
-					v86d_path);
-			printk(KERN_ERR "uvesafb: make sure that the v86d "
-					"helper is installed and executable\n");
+			pr_err("failed to execute %s\n", v86d_path);
+			pr_err("make sure that the v86d helper is installed and executable\n");
 		} else {
 			v86d_started = 1;
 			err = cn_netlink_send(m, 0, 0, gfp_any());
@@ -375,9 +376,8 @@ static u8 *uvesafb_vbe_state_save(struct uvesafb_par *par)
 	err = uvesafb_exec(task);
 
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_WARNING "uvesafb: VBE get state call "
-				"failed (eax=0x%x, err=%d)\n",
-				task->t.regs.eax, err);
+		pr_warn("VBE get state call failed (eax=0x%x, err=%d)\n",
+			task->t.regs.eax, err);
 		kfree(state);
 		state = NULL;
 	}
@@ -407,9 +407,8 @@ static void uvesafb_vbe_state_restore(struct uvesafb_par *par, u8 *state_buf)
 
 	err = uvesafb_exec(task);
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f)
-		printk(KERN_WARNING "uvesafb: VBE state restore call "
-				"failed (eax=0x%x, err=%d)\n",
-				task->t.regs.eax, err);
+		pr_warn("VBE state restore call failed (eax=0x%x, err=%d)\n",
+			task->t.regs.eax, err);
 
 	uvesafb_free(task);
 }
@@ -427,24 +426,22 @@ static int uvesafb_vbe_getinfo(struct uvesafb_ktask *task,
 
 	err = uvesafb_exec(task);
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_ERR "uvesafb: Getting VBE info block failed "
-				"(eax=0x%x, err=%d)\n", (u32)task->t.regs.eax,
-				err);
+		pr_err("Getting VBE info block failed (eax=0x%x, err=%d)\n",
+		       (u32)task->t.regs.eax, err);
 		return -EINVAL;
 	}
 
 	if (par->vbe_ib.vbe_version < 0x0200) {
-		printk(KERN_ERR "uvesafb: Sorry, pre-VBE 2.0 cards are "
-				"not supported.\n");
+		pr_err("Sorry, pre-VBE 2.0 cards are not supported\n");
 		return -EINVAL;
 	}
 
 	if (!par->vbe_ib.mode_list_ptr) {
-		printk(KERN_ERR "uvesafb: Missing mode list!\n");
+		pr_err("Missing mode list!\n");
 		return -EINVAL;
 	}
 
-	printk(KERN_INFO "uvesafb: ");
+	pr_info("");
 
 	/*
 	 * Convert string pointers and the mode list pointer into
@@ -452,23 +449,24 @@ static int uvesafb_vbe_getinfo(struct uvesafb_ktask *task,
 	 * video adapter and its vendor.
 	 */
 	if (par->vbe_ib.oem_vendor_name_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_vendor_name_ptr);
 
 	if (par->vbe_ib.oem_product_name_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_product_name_ptr);
 
 	if (par->vbe_ib.oem_product_rev_ptr)
-		printk("%s, ",
+		pr_cont("%s, ",
 			((char *)task->buf) + par->vbe_ib.oem_product_rev_ptr);
 
 	if (par->vbe_ib.oem_string_ptr)
-		printk("OEM: %s, ",
+		pr_cont("OEM: %s, ",
 			((char *)task->buf) + par->vbe_ib.oem_string_ptr);
 
-	printk("VBE v%d.%d\n", ((par->vbe_ib.vbe_version & 0xff00) >> 8),
-			par->vbe_ib.vbe_version & 0xff);
+	pr_cont("VBE v%d.%d\n",
+		(par->vbe_ib.vbe_version & 0xff00) >> 8,
+		par->vbe_ib.vbe_version & 0xff);
 
 	return 0;
 }
@@ -507,8 +505,7 @@ static int uvesafb_vbe_getmodes(struct uvesafb_ktask *task,
 
 		err = uvesafb_exec(task);
 		if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-			printk(KERN_WARNING "uvesafb: Getting mode info block "
-				"for mode 0x%x failed (eax=0x%x, err=%d)\n",
+			pr_warn("Getting mode info block for mode 0x%x failed (eax=0x%x, err=%d)\n",
 				*mode, (u32)task->t.regs.eax, err);
 			mode++;
 			par->vbe_modes_cnt--;
@@ -569,23 +566,20 @@ static int uvesafb_vbe_getpmi(struct uvesafb_ktask *task,
 						+ task->t.regs.edi);
 		par->pmi_start = (u8 *)par->pmi_base + par->pmi_base[1];
 		par->pmi_pal = (u8 *)par->pmi_base + par->pmi_base[2];
-		printk(KERN_INFO "uvesafb: protected mode interface info at "
-				 "%04x:%04x\n",
-				 (u16)task->t.regs.es, (u16)task->t.regs.edi);
-		printk(KERN_INFO "uvesafb: pmi: set display start = %p, "
-				 "set palette = %p\n", par->pmi_start,
-				 par->pmi_pal);
+		pr_info("protected mode interface info at %04x:%04x\n",
+			(u16)task->t.regs.es, (u16)task->t.regs.edi);
+		pr_info("pmi: set display start = %p, set palette = %p\n",
+			par->pmi_start, par->pmi_pal);
 
 		if (par->pmi_base[3]) {
-			printk(KERN_INFO "uvesafb: pmi: ports = ");
+			pr_info("pmi: ports =");
 			for (i = par->pmi_base[3]/2;
 					par->pmi_base[i] != 0xffff; i++)
-				printk("%x ", par->pmi_base[i]);
-			printk("\n");
+				pr_cont(" %x", par->pmi_base[i]);
+			pr_cont("\n");
 
 			if (par->pmi_base[i] != 0xffff) {
-				printk(KERN_INFO "uvesafb: can't handle memory"
-						 " requests, pmi disabled\n");
+				pr_info("can't handle memory requests, pmi disabled\n");
 				par->ypan = par->pmi_setpal = 0;
 			}
 		}
@@ -634,17 +628,13 @@ static int uvesafb_vbe_getedid(struct uvesafb_ktask *task, struct fb_info *info)
 		return -EINVAL;
 
 	if ((task->t.regs.ebx & 0x3) == 3) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports both "
-				 "DDC1 and DDC2 transfers\n");
+		pr_info("VBIOS/hardware supports both DDC1 and DDC2 transfers\n");
 	} else if ((task->t.regs.ebx & 0x3) == 2) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC2 "
-				 "transfers\n");
+		pr_info("VBIOS/hardware supports DDC2 transfers\n");
 	} else if ((task->t.regs.ebx & 0x3) == 1) {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC1 "
-				 "transfers\n");
+		pr_info("VBIOS/hardware supports DDC1 transfers\n");
 	} else {
-		printk(KERN_INFO "uvesafb: VBIOS/hardware doesn't support "
-				 "DDC transfers\n");
+		pr_info("VBIOS/hardware doesn't support DDC transfers\n");
 		return -EINVAL;
 	}
 
@@ -718,14 +708,12 @@ static void uvesafb_vbe_getmonspecs(struct uvesafb_ktask *task,
 	}
 
 	if (info->monspecs.gtf)
-		printk(KERN_INFO
-			"uvesafb: monitor limits: vf = %d Hz, hf = %d kHz, "
-			"clk = %d MHz\n", info->monspecs.vfmax,
+		pr_info("monitor limits: vf = %d Hz, hf = %d kHz, clk = %d MHz\n",
+			info->monspecs.vfmax,
 			(int)(info->monspecs.hfmax / 1000),
 			(int)(info->monspecs.dclkmax / 1000000));
 	else
-		printk(KERN_INFO "uvesafb: no monitor limits have been set, "
-				 "default refresh rate will be used\n");
+		pr_info("no monitor limits have been set, default refresh rate will be used\n");
 
 	/* Add VBE modes to the modelist. */
 	for (i = 0; i < par->vbe_modes_cnt; i++) {
@@ -779,8 +767,7 @@ static void uvesafb_vbe_getstatesize(struct uvesafb_ktask *task,
 	err = uvesafb_exec(task);
 
 	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
-		printk(KERN_WARNING "uvesafb: VBE state buffer size "
-			"cannot be determined (eax=0x%x, err=%d)\n",
+		pr_warn("VBE state buffer size cannot be determined (eax=0x%x, err=%d)\n",
 			task->t.regs.eax, err);
 		par->vbe_state_size = 0;
 		return;
@@ -815,8 +802,7 @@ static int uvesafb_vbe_init(struct fb_info *info)
 	if (par->pmi_setpal || par->ypan) {
 		if (__supported_pte_mask & _PAGE_NX) {
 			par->pmi_setpal = par->ypan = 0;
-			printk(KERN_WARNING "uvesafb: NX protection is active, "
-					    "better not use the PMI.\n");
+			pr_warn("NX protection is active, better not use the PMI\n");
 		} else {
 			uvesafb_vbe_getpmi(task, par);
 		}
@@ -859,8 +845,7 @@ static int uvesafb_vbe_init_mode(struct fb_info *info)
 				goto gotmode;
 			}
 		}
-		printk(KERN_INFO "uvesafb: requested VBE mode 0x%x is "
-				 "unavailable\n", vbemode);
+		pr_info("requested VBE mode 0x%x is unavailable\n", vbemode);
 		vbemode = 0;
 	}
 
@@ -1181,8 +1166,8 @@ static int uvesafb_open(struct fb_info *info, int user)
 	if (!cnt && par->vbe_state_size) {
 		buf =  uvesafb_vbe_state_save(par);
 		if (IS_ERR(buf)) {
-			printk(KERN_WARNING "uvesafb: save hardware state"
-				"failed, error code is %ld!\n", PTR_ERR(buf));
+			pr_warn("save hardware state failed, error code is %ld!\n",
+				PTR_ERR(buf));
 		} else {
 			par->vbe_state_orig = buf;
 		}
@@ -1293,17 +1278,16 @@ static int uvesafb_set_par(struct fb_info *info)
 		 * use our own timings.  Try again with the default timings.
 		 */
 		if (crtc != NULL) {
-			printk(KERN_WARNING "uvesafb: mode switch failed "
-				"(eax=0x%x, err=%d). Trying again with "
-				"default timings.\n", task->t.regs.eax, err);
+			pr_warn("mode switch failed (eax=0x%x, err=%d) - trying again with default timings\n",
+				task->t.regs.eax, err);
 			uvesafb_reset(task);
 			kfree(crtc);
 			crtc = NULL;
 			info->var.pixclock = 0;
 			goto setmode;
 		} else {
-			printk(KERN_ERR "uvesafb: mode switch failed (eax="
-				"0x%x, err=%d)\n", task->t.regs.eax, err);
+			pr_err("mode switch failed (eax=0x%x, err=%d)\n",
+			       task->t.regs.eax, err);
 			err = -EINVAL;
 			goto out;
 		}
@@ -1510,13 +1494,11 @@ static void uvesafb_init_info(struct fb_info *info, struct vbe_mode_ib *mode)
 				 mode->bytes_per_scan_line;
 
 	if (par->ypan && info->var.yres_virtual > info->var.yres) {
-		printk(KERN_INFO "uvesafb: scrolling: %s "
-			"using protected mode interface, "
-			"yres_virtual=%d\n",
+		pr_info("scrolling: %s using protected mode interface, yres_virtual=%d\n",
 			(par->ypan > 1) ? "ywrap" : "ypan",
 			info->var.yres_virtual);
 	} else {
-		printk(KERN_INFO "uvesafb: scrolling: redraw\n");
+		pr_info("scrolling: redraw\n");
 		info->var.yres_virtual = info->var.yres;
 		par->ypan = 0;
 	}
@@ -1704,7 +1686,7 @@ static int uvesafb_probe(struct platform_device *dev)
 
 	err = uvesafb_vbe_init(info);
 	if (err) {
-		printk(KERN_ERR "uvesafb: vbe_init() failed with %d\n", err);
+		pr_err("vbe_init() failed with %d\n", err);
 		goto out;
 	}
 
@@ -1726,15 +1708,15 @@ static int uvesafb_probe(struct platform_device *dev)
 	uvesafb_init_info(info, mode);
 
 	if (!request_region(0x3c0, 32, "uvesafb")) {
-		printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n");
+		pr_err("request region 0x3c0-0x3e0 failed\n");
 		err = -EIO;
 		goto out_mode;
 	}
 
 	if (!request_mem_region(info->fix.smem_start, info->fix.smem_len,
 				"uvesafb")) {
-		printk(KERN_ERR "uvesafb: cannot reserve video memory at "
-				"0x%lx\n", info->fix.smem_start);
+		pr_err("cannot reserve video memory at 0x%lx\n",
+		       info->fix.smem_start);
 		err = -EIO;
 		goto out_reg;
 	}
@@ -1743,10 +1725,8 @@ static int uvesafb_probe(struct platform_device *dev)
 	uvesafb_ioremap(info);
 
 	if (!info->screen_base) {
-		printk(KERN_ERR
-			"uvesafb: abort, cannot ioremap 0x%x bytes of video "
-			"memory at 0x%lx\n",
-			info->fix.smem_len, info->fix.smem_start);
+		pr_err("abort, cannot ioremap 0x%x bytes of video memory at 0x%lx\n",
+		       info->fix.smem_len, info->fix.smem_start);
 		err = -EIO;
 		goto out_mem;
 	}
@@ -1754,16 +1734,14 @@ static int uvesafb_probe(struct platform_device *dev)
 	platform_set_drvdata(dev, info);
 
 	if (register_framebuffer(info) < 0) {
-		printk(KERN_ERR
-			"uvesafb: failed to register framebuffer device\n");
+		pr_err("failed to register framebuffer device\n");
 		err = -EINVAL;
 		goto out_unmap;
 	}
 
-	printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, "
-			"using %dk, total %dk\n", info->fix.smem_start,
-			info->screen_base, info->fix.smem_len/1024,
-			par->vbe_ib.total_memory * 64);
+	pr_info("framebuffer at 0x%lx, mapped to 0x%p, using %dk, total %dk\n",
+		info->fix.smem_start, info->screen_base,
+		info->fix.smem_len / 1024, par->vbe_ib.total_memory * 64);
 	fb_info(info, "%s frame buffer device\n", info->fix.id);
 
 	err = sysfs_create_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
@@ -1871,8 +1849,7 @@ static int uvesafb_setup(char *options)
 		else if (this_opt[0] >= '0' && this_opt[0] <= '9') {
 			mode_option = this_opt;
 		} else {
-			printk(KERN_WARNING
-				"uvesafb: unrecognized option %s\n", this_opt);
+			pr_warn("unrecognized option %s\n", this_opt);
 		}
 	}
 
@@ -1931,8 +1908,7 @@ static int uvesafb_init(void)
 		err = driver_create_file(&uvesafb_driver.driver,
 				&driver_attr_v86d);
 		if (err) {
-			printk(KERN_WARNING "uvesafb: failed to register "
-					"attributes\n");
+			pr_warn("failed to register attributes\n");
 			err = 0;
 		}
 	}

diff --git a/drivers/video/fbdev/vermilion/cr_pll.c b/drivers/video/fbdev/vermilion/cr_pll.c
index ebc6e6e..ba105c8 100644
--- a/drivers/video/fbdev/vermilion/cr_pll.c
+++ b/drivers/video/fbdev/vermilion/cr_pll.c

@@ -185,6 +185,7 @@ static int __init cr_pll_init(void)
 	if (err) {
 		printk(KERN_ERR
 		       "Carillo Ranch failed to initialize vml_sys.\n");
+		iounmap(mch_regs_base);
 		pci_dev_put(mch_dev);
 		return err;
 	}

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index e6e31a1..c722cbf 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig

@@ -46,6 +46,17 @@
 	  get killed. If you say Y here, the watchdog cannot be stopped once
 	  it has been started.
 
+config WATCHDOG_HANDLE_BOOT_ENABLED
+	bool "Update boot-enabled watchdog until userspace takes over"
+	default y
+	help
+	  The default watchdog behaviour (which you get if you say Y here) is
+	  to ping watchdog devices that were enabled before the driver has
+	  been loaded until control is taken over from userspace using the
+	  /dev/watchdog file. If you say N here, the kernel will not update
+	  the watchdog on its own. Thus if your userspace does not start fast
+	  enough your device will reboot.
+
 config WATCHDOG_SYSFS
 	bool "Read different watchdog information through sysfs"
 	help
@@ -721,6 +732,14 @@
 	  This driver adds watchdog support for the integrated watchdogs in the
 	  Renesas R-Car and other SH-Mobile SoCs (usually named RWDT or SWDT).
 
+config RENESAS_RZAWDT
+	tristate "Renesas RZ/A WDT Watchdog"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select WATCHDOG_CORE
+	help
+	  This driver adds watchdog support for the integrated watchdogs in the
+	  Renesas RZ/A SoCs. These watchdogs can be used to reset a system.
+
 config ASPEED_WATCHDOG
 	tristate "Aspeed 2400 watchdog support"
 	depends on ARCH_ASPEED || COMPILE_TEST
@@ -744,6 +763,30 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called zx2967_wdt.
 
+config STM32_WATCHDOG
+	tristate "STM32 Independent WatchDoG (IWDG) support"
+	depends on ARCH_STM32
+	select WATCHDOG_CORE
+	default y
+	help
+	  Say Y here to include support for the watchdog timer
+	  in stm32 SoCs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called stm32_iwdg.
+
+config UNIPHIER_WATCHDOG
+	tristate "UniPhier watchdog support"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && MFD_SYSCON
+	select WATCHDOG_CORE
+	help
+	  Say Y here to include support watchdog timer embedded
+	  into the UniPhier system.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called uniphier_wdt.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
@@ -829,11 +872,12 @@
 	  the timeout module parameter.
 
 config F71808E_WDT
-	tristate "Fintek F71808E, F71862FG, F71869, F71882FG and F71889FG Watchdog"
+	tristate "Fintek F718xx, F818xx Super I/O Watchdog"
 	depends on X86
 	help
-	  This is the driver for the hardware watchdog on the Fintek
-	  F71808E, F71862FG, F71869, F71882FG and F71889FG Super I/O controllers.
+	  This is the driver for the hardware watchdog on the Fintek F71808E,
+	  F71862FG, F71868, F71869, F71882FG, F71889FG, F81865 and F81866
+	  Super I/O controllers.
 
 	  You can compile this driver directly into the kernel, or use
 	  it as a module.  The module will be called f71808e_wdt.
@@ -1037,13 +1081,12 @@
 config IT87_WDT
 	tristate "IT87 Watchdog Timer"
 	depends on X86
+	select WATCHDOG_CORE
 	---help---
-	  This is the driver for the hardware watchdog on the ITE IT8620,
-	  IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726 and IT8728
-	  Super I/O chips.
-
-	  If the driver does not work, then make sure that the game port in
-	  the BIOS is enabled.
+	  This is the driver for the hardware watchdog on the ITE IT8607,
+	  IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686, IT8702,
+	  IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728, and
+	  IT8783 Super I/O chips.
 
 	  This watchdog simply watches your kernel to make sure it doesn't
 	  freeze, and if it does, it reboots your computer after a certain

diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index a2126e2..56adf9f 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile

@@ -82,8 +82,11 @@
 obj-$(CONFIG_BCM7038_WDT) += bcm7038_wdt.o
 obj-$(CONFIG_ATLAS7_WATCHDOG) += atlas7_wdt.o
 obj-$(CONFIG_RENESAS_WDT) += renesas_wdt.o
+obj-$(CONFIG_RENESAS_RZAWDT) += rza_wdt.o
 obj-$(CONFIG_ASPEED_WATCHDOG) += aspeed_wdt.o
 obj-$(CONFIG_ZX2967_WATCHDOG) += zx2967_wdt.o
+obj-$(CONFIG_STM32_WATCHDOG) += stm32_iwdg.o
+obj-$(CONFIG_UNIPHIER_WATCHDOG) += uniphier_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o

diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index 35725e2..2365828 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c

@@ -97,7 +97,7 @@ static int bcm47xx_wdt_restart(struct watchdog_device *wdd,
 	return 0;
 }
 
-static struct watchdog_ops bcm47xx_wdt_hard_ops = {
+static const struct watchdog_ops bcm47xx_wdt_hard_ops = {
 	.owner		= THIS_MODULE,
 	.start		= bcm47xx_wdt_hard_start,
 	.stop		= bcm47xx_wdt_hard_stop,
@@ -168,7 +168,7 @@ static const struct watchdog_info bcm47xx_wdt_info = {
 				WDIOF_MAGICCLOSE,
 };
 
-static struct watchdog_ops bcm47xx_wdt_soft_ops = {
+static const struct watchdog_ops bcm47xx_wdt_soft_ops = {
 	.owner		= THIS_MODULE,
 	.start		= bcm47xx_wdt_soft_start,
 	.stop		= bcm47xx_wdt_soft_stop,

diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index 86e0b5d..05c0000 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c

@@ -458,7 +458,7 @@ static int __maybe_unused cdns_wdt_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(cdns_wdt_pm_ops, cdns_wdt_suspend, cdns_wdt_resume);
 
-static struct of_device_id cdns_wdt_of_match[] = {
+static const struct of_device_id cdns_wdt_of_match[] = {
 	{ .compatible = "cdns,wdt-r1p2", },
 	{ /* end of table */ }
 };

diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 0e731d7..2f46487 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c

@@ -173,7 +173,11 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 		return PTR_ERR(davinci_wdt->clk);
 	}
 
-	clk_prepare_enable(davinci_wdt->clk);
+	ret = clk_prepare_enable(davinci_wdt->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare clock\n");
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, davinci_wdt);
 
@@ -198,8 +202,10 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 		return PTR_ERR(davinci_wdt->base);
 
 	ret = watchdog_register_device(wdd);
-	if (ret < 0)
+	if (ret < 0) {
+		clk_disable_unprepare(davinci_wdt->clk);
 		dev_err(dev, "cannot register watchdog device\n");
+	}
 
 	return ret;
 }

diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index 914da3a..36be987 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c

@@ -29,6 +29,7 @@
 #include <linux/of.h>
 #include <linux/pm.h>
 #include <linux/platform_device.h>
+#include <linux/reset.h>
 #include <linux/watchdog.h>
 
 #define WDOG_CONTROL_REG_OFFSET		    0x00
@@ -54,6 +55,7 @@ struct dw_wdt {
 	struct clk		*clk;
 	unsigned long		rate;
 	struct watchdog_device	wdd;
+	struct reset_control	*rst;
 };
 
 #define to_dw_wdt(wdd)	container_of(wdd, struct dw_wdt, wdd)
@@ -234,6 +236,14 @@ static int dw_wdt_drv_probe(struct platform_device *pdev)
 		goto out_disable_clk;
 	}
 
+	dw_wdt->rst = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
+	if (IS_ERR(dw_wdt->rst)) {
+		ret = PTR_ERR(dw_wdt->rst);
+		goto out_disable_clk;
+	}
+
+	reset_control_deassert(dw_wdt->rst);
+
 	wdd = &dw_wdt->wdd;
 	wdd->info = &dw_wdt_ident;
 	wdd->ops = &dw_wdt_ops;
@@ -279,6 +289,7 @@ static int dw_wdt_drv_remove(struct platform_device *pdev)
 	struct dw_wdt *dw_wdt = platform_get_drvdata(pdev);
 
 	watchdog_unregister_device(&dw_wdt->wdd);
+	reset_control_assert(dw_wdt->rst);
 	clk_disable_unprepare(dw_wdt->clk);
 
 	return 0;

diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index 1b7e916..8658dba 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c

@@ -57,6 +57,7 @@
 #define SIO_F71808_ID		0x0901	/* Chipset ID */
 #define SIO_F71858_ID		0x0507	/* Chipset ID */
 #define SIO_F71862_ID		0x0601	/* Chipset ID */
+#define SIO_F71868_ID		0x1106	/* Chipset ID */
 #define SIO_F71869_ID		0x0814	/* Chipset ID */
 #define SIO_F71869A_ID		0x1007	/* Chipset ID */
 #define SIO_F71882_ID		0x0541	/* Chipset ID */
@@ -101,7 +102,7 @@ MODULE_PARM_DESC(timeout,
 static unsigned int pulse_width = WATCHDOG_PULSE_WIDTH;
 module_param(pulse_width, uint, 0);
 MODULE_PARM_DESC(pulse_width,
-	"Watchdog signal pulse width. 0(=level), 1 ms, 25 ms, 125 ms or 5000 ms"
+	"Watchdog signal pulse width. 0(=level), 1, 25, 30, 125, 150, 5000 or 6000 ms"
 			" (default=" __MODULE_STRING(WATCHDOG_PULSE_WIDTH) ")");
 
 static unsigned int f71862fg_pin = WATCHDOG_F71862FG_PIN;
@@ -119,13 +120,14 @@ module_param(start_withtimeout, uint, 0);
 MODULE_PARM_DESC(start_withtimeout, "Start watchdog timer on module load with"
 	" given initial timeout. Zero (default) disables this feature.");
 
-enum chips { f71808fg, f71858fg, f71862fg, f71869, f71882fg, f71889fg, f81865,
-	     f81866};
+enum chips { f71808fg, f71858fg, f71862fg, f71868, f71869, f71882fg, f71889fg,
+	     f81865, f81866};
 
 static const char *f71808e_names[] = {
 	"f71808fg",
 	"f71858fg",
 	"f71862fg",
+	"f71868",
 	"f71869",
 	"f71882fg",
 	"f71889fg",
@@ -252,16 +254,23 @@ static int watchdog_set_timeout(int timeout)
 static int watchdog_set_pulse_width(unsigned int pw)
 {
 	int err = 0;
+	unsigned int t1 = 25, t2 = 125, t3 = 5000;
+
+	if (watchdog.type == f71868) {
+		t1 = 30;
+		t2 = 150;
+		t3 = 6000;
+	}
 
 	mutex_lock(&watchdog.lock);
 
-	if        (pw <=    1) {
+	if        (pw <=  1) {
 		watchdog.pulse_val = 0;
-	} else if (pw <=   25) {
+	} else if (pw <= t1) {
 		watchdog.pulse_val = 1;
-	} else if (pw <=  125) {
+	} else if (pw <= t2) {
 		watchdog.pulse_val = 2;
-	} else if (pw <= 5000) {
+	} else if (pw <= t3) {
 		watchdog.pulse_val = 3;
 	} else {
 		pr_err("pulse width out of range\n");
@@ -354,6 +363,7 @@ static int watchdog_start(void)
 			goto exit_superio;
 		break;
 
+	case f71868:
 	case f71869:
 		/* GPIO14 --> WDTRST# */
 		superio_clear_bit(watchdog.sioaddr, SIO_REG_MFUNCT1, 4);
@@ -792,6 +802,9 @@ static int __init f71808e_find(int sioaddr)
 		watchdog.type = f71862fg;
 		err = f71862fg_pin_configure(0); /* validate module parameter */
 		break;
+	case SIO_F71868_ID:
+		watchdog.type = f71868;
+		break;
 	case SIO_F71869_ID:
 	case SIO_F71869A_ID:
 		watchdog.type = f71869;

diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 93457ca..cb66c2f 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c

@@ -18,7 +18,6 @@
 
 #define SOFT_TIMEOUT_MIN	1
 #define SOFT_TIMEOUT_DEF	60
-#define SOFT_TIMEOUT_MAX	0xffff
 
 enum {
 	HW_ALGO_TOGGLE,
@@ -30,11 +29,7 @@ struct gpio_wdt_priv {
 	bool			active_low;
 	bool			state;
 	bool			always_running;
-	bool			armed;
 	unsigned int		hw_algo;
-	unsigned int		hw_margin;
-	unsigned long		last_jiffies;
-	struct timer_list	timer;
 	struct watchdog_device	wdd;
 };
 
@@ -47,21 +42,10 @@ static void gpio_wdt_disable(struct gpio_wdt_priv *priv)
 		gpio_direction_input(priv->gpio);
 }
 
-static void gpio_wdt_hwping(unsigned long data)
+static int gpio_wdt_ping(struct watchdog_device *wdd)
 {
-	struct watchdog_device *wdd = (struct watchdog_device *)data;
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	if (priv->armed && time_after(jiffies, priv->last_jiffies +
-				      msecs_to_jiffies(wdd->timeout * 1000))) {
-		dev_crit(wdd->parent,
-			 "Timer expired. System will reboot soon!\n");
-		return;
-	}
-
-	/* Restart timer */
-	mod_timer(&priv->timer, jiffies + priv->hw_margin);
-
 	switch (priv->hw_algo) {
 	case HW_ALGO_TOGGLE:
 		/* Toggle output pin */
@@ -75,55 +59,33 @@ static void gpio_wdt_hwping(unsigned long data)
 		gpio_set_value_cansleep(priv->gpio, priv->active_low);
 		break;
 	}
-}
-
-static void gpio_wdt_start_impl(struct gpio_wdt_priv *priv)
-{
-	priv->state = priv->active_low;
-	gpio_direction_output(priv->gpio, priv->state);
-	priv->last_jiffies = jiffies;
-	gpio_wdt_hwping((unsigned long)&priv->wdd);
+	return 0;
 }
 
 static int gpio_wdt_start(struct watchdog_device *wdd)
 {
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	gpio_wdt_start_impl(priv);
-	priv->armed = true;
+	priv->state = priv->active_low;
+	gpio_direction_output(priv->gpio, priv->state);
 
-	return 0;
+	set_bit(WDOG_HW_RUNNING, &wdd->status);
+
+	return gpio_wdt_ping(wdd);
 }
 
 static int gpio_wdt_stop(struct watchdog_device *wdd)
 {
 	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
 
-	priv->armed = false;
 	if (!priv->always_running) {
-		mod_timer(&priv->timer, 0);
 		gpio_wdt_disable(priv);
+		clear_bit(WDOG_HW_RUNNING, &wdd->status);
 	}
 
 	return 0;
 }
 
-static int gpio_wdt_ping(struct watchdog_device *wdd)
-{
-	struct gpio_wdt_priv *priv = watchdog_get_drvdata(wdd);
-
-	priv->last_jiffies = jiffies;
-
-	return 0;
-}
-
-static int gpio_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
-{
-	wdd->timeout = t;
-
-	return gpio_wdt_ping(wdd);
-}
-
 static const struct watchdog_info gpio_wdt_ident = {
 	.options	= WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING |
 			  WDIOF_SETTIMEOUT,
@@ -135,7 +97,6 @@ static const struct watchdog_ops gpio_wdt_ops = {
 	.start		= gpio_wdt_start,
 	.stop		= gpio_wdt_stop,
 	.ping		= gpio_wdt_ping,
-	.set_timeout	= gpio_wdt_set_timeout,
 };
 
 static int gpio_wdt_probe(struct platform_device *pdev)
@@ -185,9 +146,6 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	if (hw_margin < 2 || hw_margin > 65535)
 		return -EINVAL;
 
-	/* Use safe value (1/2 of real timeout) */
-	priv->hw_margin = msecs_to_jiffies(hw_margin / 2);
-
 	priv->always_running = of_property_read_bool(pdev->dev.of_node,
 						     "always-running");
 
@@ -196,31 +154,26 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	priv->wdd.info		= &gpio_wdt_ident;
 	priv->wdd.ops		= &gpio_wdt_ops;
 	priv->wdd.min_timeout	= SOFT_TIMEOUT_MIN;
-	priv->wdd.max_timeout	= SOFT_TIMEOUT_MAX;
+	priv->wdd.max_hw_heartbeat_ms = hw_margin;
 	priv->wdd.parent	= &pdev->dev;
 
 	if (watchdog_init_timeout(&priv->wdd, 0, &pdev->dev) < 0)
 		priv->wdd.timeout = SOFT_TIMEOUT_DEF;
 
-	setup_timer(&priv->timer, gpio_wdt_hwping, (unsigned long)&priv->wdd);
-
 	watchdog_stop_on_reboot(&priv->wdd);
 
-	ret = watchdog_register_device(&priv->wdd);
-	if (ret)
-		return ret;
-
 	if (priv->always_running)
-		gpio_wdt_start_impl(priv);
+		gpio_wdt_start(&priv->wdd);
 
-	return 0;
+	ret = watchdog_register_device(&priv->wdd);
+
+	return ret;
 }
 
 static int gpio_wdt_remove(struct platform_device *pdev)
 {
 	struct gpio_wdt_priv *priv = platform_get_drvdata(pdev);
 
-	del_timer_sync(&priv->timer);
 	watchdog_unregister_device(&priv->wdd);
 
 	return 0;

diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
index 45e4d02..72c108a 100644
--- a/drivers/watchdog/intel-mid_wdt.c
+++ b/drivers/watchdog/intel-mid_wdt.c

@@ -147,8 +147,21 @@ static int mid_wdt_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	/* Make sure the watchdog is not running */
-	wdt_stop(wdt_dev);
+	/*
+	 * The firmware followed by U-Boot leaves the watchdog running
+	 * with the default threshold which may vary. When we get here
+	 * we should make a decision to prevent any side effects before
+	 * user space daemon will take care of it. The best option,
+	 * taking into consideration that there is no way to read values
+	 * back from hardware, is to enforce watchdog being run with
+	 * deterministic values.
+	 */
+	ret = wdt_start(wdt_dev);
+	if (ret)
+		return ret;
+
+	/* Make sure the watchdog is serviced */
+	set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
 
 	ret = devm_watchdog_register_device(&pdev->dev, wdt_dev);
 	if (ret) {

diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index b9878c4..dd1e7ea 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c

@@ -12,8 +12,9 @@
  *		    http://www.ite.com.tw/
  *
  *	Support of the watchdog timers, which are available on
- *	IT8620, IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726,
- *	IT8728 and IT8783.
+ *	IT8607, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686,
+ *	IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728,
+ *	and IT8783.
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -24,38 +25,21 @@
  *	but WITHOUT ANY WARRANTY; without even the implied warranty of
  *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *	GNU General Public License for more details.
- *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/miscdevice.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
 #include <linux/watchdog.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
 
-
-#define WATCHDOG_VERSION	"1.14"
 #define WATCHDOG_NAME		"IT87 WDT"
-#define DRIVER_VERSION		WATCHDOG_NAME " driver, v" WATCHDOG_VERSION "\n"
-#define WD_MAGIC		'V'
 
 /* Defaults for Module Parameter */
-#define DEFAULT_NOGAMEPORT	0
-#define DEFAULT_NOCIR		0
-#define DEFAULT_EXCLUSIVE	1
 #define DEFAULT_TIMEOUT		60
 #define DEFAULT_TESTMODE	0
 #define DEFAULT_NOWAYOUT	WATCHDOG_NOWAYOUT
@@ -66,19 +50,22 @@
 
 /* Logical device Numbers LDN */
 #define GPIO		0x07
-#define GAMEPORT	0x09
-#define CIR		0x0a
 
 /* Configuration Registers and Functions */
 #define LDNREG		0x07
 #define CHIPID		0x20
 #define CHIPREV		0x22
-#define ACTREG		0x30
-#define BASEREG		0x60
 
 /* Chip Id numbers */
 #define NO_DEV_ID	0xffff
+#define IT8607_ID	0x8607
 #define IT8620_ID	0x8620
+#define IT8622_ID	0x8622
+#define IT8625_ID	0x8625
+#define IT8628_ID	0x8628
+#define IT8655_ID	0x8655
+#define IT8665_ID	0x8665
+#define IT8686_ID	0x8686
 #define IT8702_ID	0x8702
 #define IT8705_ID	0x8705
 #define IT8712_ID	0x8712
@@ -96,14 +83,6 @@
 #define WDTVALLSB	0x73
 #define WDTVALMSB	0x74
 
-/* GPIO Bits WDTCTRL */
-#define WDT_CIRINT	0x80
-#define WDT_MOUSEINT	0x40
-#define WDT_KYBINT	0x20
-#define WDT_GAMEPORT	0x10 /* not in it8718, it8720, it8721, it8728 */
-#define WDT_FORCE	0x02
-#define WDT_ZERO	0x01
-
 /* GPIO Bits WDTCFG */
 #define WDT_TOV1	0x80
 #define WDT_KRST	0x40
@@ -111,55 +90,12 @@
 #define WDT_PWROK	0x10 /* not in it8721 */
 #define WDT_INT_MASK	0x0f
 
-/* CIR Configuration Register LDN=0x0a */
-#define CIR_ILS		0x70
+static unsigned int max_units, chip_type;
 
-/* The default Base address is not always available, we use this */
-#define CIR_BASE	0x0208
+static unsigned int timeout = DEFAULT_TIMEOUT;
+static int testmode = DEFAULT_TESTMODE;
+static bool nowayout = DEFAULT_NOWAYOUT;
 
-/* CIR Controller */
-#define CIR_DR(b)	(b)
-#define CIR_IER(b)	(b + 1)
-#define CIR_RCR(b)	(b + 2)
-#define CIR_TCR1(b)	(b + 3)
-#define CIR_TCR2(b)	(b + 4)
-#define CIR_TSR(b)	(b + 5)
-#define CIR_RSR(b)	(b + 6)
-#define CIR_BDLR(b)	(b + 5)
-#define CIR_BDHR(b)	(b + 6)
-#define CIR_IIR(b)	(b + 7)
-
-/* Default Base address of Game port */
-#define GP_BASE_DEFAULT	0x0201
-
-/* wdt_status */
-#define WDTS_TIMER_RUN	0
-#define WDTS_DEV_OPEN	1
-#define WDTS_KEEPALIVE	2
-#define WDTS_LOCKED	3
-#define WDTS_USE_GP	4
-#define WDTS_EXPECTED	5
-#define WDTS_USE_CIR	6
-
-static	unsigned int base, gpact, ciract, max_units, chip_type;
-static	unsigned long wdt_status;
-
-static	int nogameport = DEFAULT_NOGAMEPORT;
-static int nocir      = DEFAULT_NOCIR;
-static	int exclusive  = DEFAULT_EXCLUSIVE;
-static	int timeout    = DEFAULT_TIMEOUT;
-static	int testmode   = DEFAULT_TESTMODE;
-static	bool nowayout   = DEFAULT_NOWAYOUT;
-
-module_param(nogameport, int, 0);
-MODULE_PARM_DESC(nogameport, "Forbid the activation of game port, default="
-		__MODULE_STRING(DEFAULT_NOGAMEPORT));
-module_param(nocir, int, 0);
-MODULE_PARM_DESC(nocir, "Forbid the use of Consumer IR interrupts to reset timer, default="
-		__MODULE_STRING(DEFAULT_NOCIR));
-module_param(exclusive, int, 0);
-MODULE_PARM_DESC(exclusive, "Watchdog exclusive device open, default="
-		__MODULE_STRING(DEFAULT_EXCLUSIVE));
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds, default="
 		__MODULE_STRING(DEFAULT_TIMEOUT));
@@ -231,26 +167,40 @@ static inline void superio_outw(int val, int reg)
 }
 
 /* Internal function, should be called after superio_select(GPIO) */
-static void wdt_update_timeout(void)
+static void _wdt_update_timeout(unsigned int t)
 {
 	unsigned char cfg = WDT_KRST;
-	int tm = timeout;
 
 	if (testmode)
 		cfg = 0;
 
-	if (tm <= max_units)
+	if (t <= max_units)
 		cfg |= WDT_TOV1;
 	else
-		tm /= 60;
+		t /= 60;
 
 	if (chip_type != IT8721_ID)
 		cfg |= WDT_PWROK;
 
 	superio_outb(cfg, WDTCFG);
-	superio_outb(tm, WDTVALLSB);
+	superio_outb(t, WDTVALLSB);
 	if (max_units > 255)
-		superio_outb(tm>>8, WDTVALMSB);
+		superio_outb(t >> 8, WDTVALMSB);
+}
+
+static int wdt_update_timeout(unsigned int t)
+{
+	int ret;
+
+	ret = superio_enter();
+	if (ret)
+		return ret;
+
+	superio_select(GPIO);
+	_wdt_update_timeout(t);
+	superio_exit();
+
+	return 0;
 }
 
 static int wdt_round_time(int t)
@@ -262,57 +212,14 @@ static int wdt_round_time(int t)
 
 /* watchdog timer handling */
 
-static void wdt_keepalive(void)
+static int wdt_start(struct watchdog_device *wdd)
 {
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		inb(base);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		/* The timer reloads with around 5 msec delay */
-		outb(0x55, CIR_DR(base));
-	else {
-		if (superio_enter())
-			return;
-
-		superio_select(GPIO);
-		wdt_update_timeout();
-		superio_exit();
-	}
-	set_bit(WDTS_KEEPALIVE, &wdt_status);
+	return wdt_update_timeout(wdd->timeout);
 }
 
-static int wdt_start(void)
+static int wdt_stop(struct watchdog_device *wdd)
 {
-	int ret = superio_enter();
-	if (ret)
-		return ret;
-
-	superio_select(GPIO);
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		superio_outb(WDT_GAMEPORT, WDTCTRL);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		superio_outb(WDT_CIRINT, WDTCTRL);
-	wdt_update_timeout();
-
-	superio_exit();
-
-	return 0;
-}
-
-static int wdt_stop(void)
-{
-	int ret = superio_enter();
-	if (ret)
-		return ret;
-
-	superio_select(GPIO);
-	superio_outb(0x00, WDTCTRL);
-	superio_outb(WDT_TOV1, WDTCFG);
-	superio_outb(0x00, WDTVALLSB);
-	if (max_units > 255)
-		superio_outb(0x00, WDTVALMSB);
-
-	superio_exit();
-	return 0;
+	return wdt_update_timeout(0);
 }
 
 /**
@@ -325,292 +232,44 @@ static int wdt_stop(void)
  *	Used within WDIOC_SETTIMEOUT watchdog device ioctl.
  */
 
-static int wdt_set_timeout(int t)
+static int wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
 {
-	if (t < 1 || t > max_units * 60)
-		return -EINVAL;
+	int ret = 0;
 
 	if (t > max_units)
-		timeout = wdt_round_time(t);
-	else
-		timeout = t;
+		t = wdt_round_time(t);
 
-	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		int ret = superio_enter();
-		if (ret)
-			return ret;
+	wdd->timeout = t;
 
-		superio_select(GPIO);
-		wdt_update_timeout();
-		superio_exit();
-	}
-	return 0;
-}
+	if (watchdog_hw_running(wdd))
+		ret = wdt_update_timeout(t);
 
-/**
- *	wdt_get_status - determines the status supported by watchdog ioctl
- *	@status: status returned to user space
- *
- *	The status bit of the device does not allow to distinguish
- *	between a regular system reset and a watchdog forced reset.
- *	But, in test mode it is useful, so it is supported through
- *	WDIOC_GETSTATUS watchdog ioctl. Additionally the driver
- *	reports the keepalive signal and the acception of the magic.
- *
- *	Used within WDIOC_GETSTATUS watchdog device ioctl.
- */
-
-static int wdt_get_status(int *status)
-{
-	*status = 0;
-	if (testmode) {
-		int ret = superio_enter();
-		if (ret)
-			return ret;
-
-		superio_select(GPIO);
-		if (superio_inb(WDTCTRL) & WDT_ZERO) {
-			superio_outb(0x00, WDTCTRL);
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			*status |= WDIOF_CARDRESET;
-		}
-
-		superio_exit();
-	}
-	if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status))
-		*status |= WDIOF_KEEPALIVEPING;
-	if (test_bit(WDTS_EXPECTED, &wdt_status))
-		*status |= WDIOF_MAGICCLOSE;
-	return 0;
-}
-
-/* /dev/watchdog handling */
-
-/**
- *	wdt_open - watchdog file_operations .open
- *	@inode: inode of the device
- *	@file: file handle to the device
- *
- *	The watchdog timer starts by opening the device.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static int wdt_open(struct inode *inode, struct file *file)
-{
-	if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status))
-		return -EBUSY;
-	if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		int ret;
-		if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status))
-			__module_get(THIS_MODULE);
-
-		ret = wdt_start();
-		if (ret) {
-			clear_bit(WDTS_LOCKED, &wdt_status);
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			clear_bit(WDTS_DEV_OPEN, &wdt_status);
-			return ret;
-		}
-	}
-	return nonseekable_open(inode, file);
-}
-
-/**
- *	wdt_release - watchdog file_operations .release
- *	@inode: inode of the device
- *	@file: file handle to the device
- *
- *	Closing the watchdog device either stops the watchdog timer
- *	or in the case, that nowayout is set or the magic character
- *	wasn't written, a critical warning about an running watchdog
- *	timer is given.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static int wdt_release(struct inode *inode, struct file *file)
-{
-	if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-		if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) {
-			int ret = wdt_stop();
-			if (ret) {
-				/*
-				 * Stop failed. Just keep the watchdog alive
-				 * and hope nothing bad happens.
-				 */
-				set_bit(WDTS_EXPECTED, &wdt_status);
-				wdt_keepalive();
-				return ret;
-			}
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-		} else {
-			wdt_keepalive();
-			pr_crit("unexpected close, not stopping watchdog!\n");
-		}
-	}
-	clear_bit(WDTS_DEV_OPEN, &wdt_status);
-	return 0;
-}
-
-/**
- *	wdt_write - watchdog file_operations .write
- *	@file: file handle to the watchdog
- *	@buf: buffer to write
- *	@count: count of bytes
- *	@ppos: pointer to the position to write. No seeks allowed
- *
- *	A write to a watchdog device is defined as a keepalive signal. Any
- *	write of data will do, as we don't define content meaning.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static ssize_t wdt_write(struct file *file, const char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-	if (count) {
-		clear_bit(WDTS_EXPECTED, &wdt_status);
-		wdt_keepalive();
-	}
-	if (!nowayout) {
-		size_t ofs;
-
-	/* note: just in case someone wrote the magic character long ago */
-		for (ofs = 0; ofs != count; ofs++) {
-			char c;
-			if (get_user(c, buf + ofs))
-				return -EFAULT;
-			if (c == WD_MAGIC)
-				set_bit(WDTS_EXPECTED, &wdt_status);
-		}
-	}
-	return count;
+	return ret;
 }
 
 static const struct watchdog_info ident = {
 	.options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
-	.firmware_version =	1,
+	.firmware_version = 1,
 	.identity = WATCHDOG_NAME,
 };
 
-/**
- *	wdt_ioctl - watchdog file_operations .unlocked_ioctl
- *	@file: file handle to the device
- *	@cmd: watchdog command
- *	@arg: argument pointer
- *
- *	The watchdog API defines a common set of functions for all watchdogs
- *	according to their available features.
- *
- *	Used within the file operation of the watchdog device.
- */
-
-static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int rc = 0, status, new_options, new_timeout;
-	union {
-		struct watchdog_info __user *ident;
-		int __user *i;
-	} uarg;
-
-	uarg.i = (int __user *)arg;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		return copy_to_user(uarg.ident,
-				    &ident, sizeof(ident)) ? -EFAULT : 0;
-
-	case WDIOC_GETSTATUS:
-		rc = wdt_get_status(&status);
-		if (rc)
-			return rc;
-		return put_user(status, uarg.i);
-
-	case WDIOC_GETBOOTSTATUS:
-		return put_user(0, uarg.i);
-
-	case WDIOC_KEEPALIVE:
-		wdt_keepalive();
-		return 0;
-
-	case WDIOC_SETOPTIONS:
-		if (get_user(new_options, uarg.i))
-			return -EFAULT;
-
-		switch (new_options) {
-		case WDIOS_DISABLECARD:
-			if (test_bit(WDTS_TIMER_RUN, &wdt_status)) {
-				rc = wdt_stop();
-				if (rc)
-					return rc;
-			}
-			clear_bit(WDTS_TIMER_RUN, &wdt_status);
-			return 0;
-
-		case WDIOS_ENABLECARD:
-			if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) {
-				rc = wdt_start();
-				if (rc) {
-					clear_bit(WDTS_TIMER_RUN, &wdt_status);
-					return rc;
-				}
-			}
-			return 0;
-
-		default:
-			return -EFAULT;
-		}
-
-	case WDIOC_SETTIMEOUT:
-		if (get_user(new_timeout, uarg.i))
-			return -EFAULT;
-		rc = wdt_set_timeout(new_timeout);
-	case WDIOC_GETTIMEOUT:
-		if (put_user(timeout, uarg.i))
-			return -EFAULT;
-		return rc;
-
-	default:
-		return -ENOTTY;
-	}
-}
-
-static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
-	void *unused)
-{
-	if (code == SYS_DOWN || code == SYS_HALT)
-		wdt_stop();
-	return NOTIFY_DONE;
-}
-
-static const struct file_operations wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.write		= wdt_write,
-	.unlocked_ioctl	= wdt_ioctl,
-	.open		= wdt_open,
-	.release	= wdt_release,
+static struct watchdog_ops wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.set_timeout = wdt_set_timeout,
 };
 
-static struct miscdevice wdt_miscdev = {
-	.minor		= WATCHDOG_MINOR,
-	.name		= "watchdog",
-	.fops		= &wdt_fops,
-};
-
-static struct notifier_block wdt_notifier = {
-	.notifier_call = wdt_notify_sys,
+static struct watchdog_device wdt_dev = {
+	.info = &ident,
+	.ops = &wdt_ops,
+	.min_timeout = 1,
 };
 
 static int __init it87_wdt_init(void)
 {
-	int rc = 0;
-	int try_gameport = !nogameport;
 	u8  chip_rev;
-	int gp_rreq_fail = 0;
-
-	wdt_status = 0;
+	int rc;
 
 	rc = superio_enter();
 	if (rc)
@@ -631,14 +290,20 @@ static int __init it87_wdt_init(void)
 	case IT8726_ID:
 		max_units = 65535;
 		break;
+	case IT8607_ID:
 	case IT8620_ID:
+	case IT8622_ID:
+	case IT8625_ID:
+	case IT8628_ID:
+	case IT8655_ID:
+	case IT8665_ID:
+	case IT8686_ID:
 	case IT8718_ID:
 	case IT8720_ID:
 	case IT8721_ID:
 	case IT8728_ID:
 	case IT8783_ID:
 		max_units = 65535;
-		try_gameport = 0;
 		break;
 	case IT8705_ID:
 		pr_err("Unsupported Chip found, Chip %04x Revision %02x\n",
@@ -660,48 +325,7 @@ static int __init it87_wdt_init(void)
 	superio_select(GPIO);
 	superio_outb(WDT_TOV1, WDTCFG);
 	superio_outb(0x00, WDTCTRL);
-
-	/* First try to get Gameport support */
-	if (try_gameport) {
-		superio_select(GAMEPORT);
-		base = superio_inw(BASEREG);
-		if (!base) {
-			base = GP_BASE_DEFAULT;
-			superio_outw(base, BASEREG);
-		}
-		gpact = superio_inb(ACTREG);
-		superio_outb(0x01, ACTREG);
-		if (request_region(base, 1, WATCHDOG_NAME))
-			set_bit(WDTS_USE_GP, &wdt_status);
-		else
-			gp_rreq_fail = 1;
-	}
-
-	/* If we haven't Gameport support, try to get CIR support */
-	if (!nocir && !test_bit(WDTS_USE_GP, &wdt_status)) {
-		if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) {
-			if (gp_rreq_fail)
-				pr_err("I/O Address 0x%04x and 0x%04x already in use\n",
-				       base, CIR_BASE);
-			else
-				pr_err("I/O Address 0x%04x already in use\n",
-				       CIR_BASE);
-			rc = -EIO;
-			goto err_out;
-		}
-		base = CIR_BASE;
-
-		superio_select(CIR);
-		superio_outw(base, BASEREG);
-		superio_outb(0x00, CIR_ILS);
-		ciract = superio_inb(ACTREG);
-		superio_outb(0x01, ACTREG);
-		if (gp_rreq_fail) {
-			superio_select(GAMEPORT);
-			superio_outb(gpact, ACTREG);
-		}
-		set_bit(WDTS_USE_CIR, &wdt_status);
-	}
+	superio_exit();
 
 	if (timeout < 1 || timeout > max_units * 60) {
 		timeout = DEFAULT_TIMEOUT;
@@ -712,83 +336,25 @@ static int __init it87_wdt_init(void)
 	if (timeout > max_units)
 		timeout = wdt_round_time(timeout);
 
-	rc = register_reboot_notifier(&wdt_notifier);
+	wdt_dev.timeout = timeout;
+	wdt_dev.max_timeout = max_units * 60;
+
+	watchdog_stop_on_reboot(&wdt_dev);
+	rc = watchdog_register_device(&wdt_dev);
 	if (rc) {
-		pr_err("Cannot register reboot notifier (err=%d)\n", rc);
-		goto err_out_region;
+		pr_err("Cannot register watchdog device (err=%d)\n", rc);
+		return rc;
 	}
 
-	rc = misc_register(&wdt_miscdev);
-	if (rc) {
-		pr_err("Cannot register miscdev on minor=%d (err=%d)\n",
-		       wdt_miscdev.minor, rc);
-		goto err_out_reboot;
-	}
+	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d)\n",
+		chip_type, chip_rev, timeout, nowayout, testmode);
 
-	/* Initialize CIR to use it as keepalive source */
-	if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-		outb(0x00, CIR_RCR(base));
-		outb(0xc0, CIR_TCR1(base));
-		outb(0x5c, CIR_TCR2(base));
-		outb(0x10, CIR_IER(base));
-		outb(0x00, CIR_BDHR(base));
-		outb(0x01, CIR_BDLR(base));
-		outb(0x09, CIR_IER(base));
-	}
-
-	pr_info("Chip IT%04x revision %d initialized. timeout=%d sec (nowayout=%d testmode=%d exclusive=%d nogameport=%d nocir=%d)\n",
-		chip_type, chip_rev, timeout,
-		nowayout, testmode, exclusive, nogameport, nocir);
-
-	superio_exit();
 	return 0;
-
-err_out_reboot:
-	unregister_reboot_notifier(&wdt_notifier);
-err_out_region:
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		release_region(base, 1);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-		release_region(base, 8);
-		superio_select(CIR);
-		superio_outb(ciract, ACTREG);
-	}
-err_out:
-	if (try_gameport) {
-		superio_select(GAMEPORT);
-		superio_outb(gpact, ACTREG);
-	}
-
-	superio_exit();
-	return rc;
 }
 
 static void __exit it87_wdt_exit(void)
 {
-	if (superio_enter() == 0) {
-		superio_select(GPIO);
-		superio_outb(0x00, WDTCTRL);
-		superio_outb(0x00, WDTCFG);
-		superio_outb(0x00, WDTVALLSB);
-		if (max_units > 255)
-			superio_outb(0x00, WDTVALMSB);
-		if (test_bit(WDTS_USE_GP, &wdt_status)) {
-			superio_select(GAMEPORT);
-			superio_outb(gpact, ACTREG);
-		} else if (test_bit(WDTS_USE_CIR, &wdt_status)) {
-			superio_select(CIR);
-			superio_outb(ciract, ACTREG);
-		}
-		superio_exit();
-	}
-
-	misc_deregister(&wdt_miscdev);
-	unregister_reboot_notifier(&wdt_notifier);
-
-	if (test_bit(WDTS_USE_GP, &wdt_status))
-		release_region(base, 1);
-	else if (test_bit(WDTS_USE_CIR, &wdt_status))
-		release_region(base, 8);
+	watchdog_unregister_device(&wdt_dev);
 }
 
 module_init(it87_wdt_init);

diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index 45d4766..69a5a57 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c

@@ -203,7 +203,9 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(data->clk))
 		return PTR_ERR(data->clk);
 
-	clk_prepare_enable(data->clk);
+	ret = clk_prepare_enable(data->clk);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, data);
 

diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 39be4dd..83af7d6 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c

@@ -651,5 +651,5 @@ module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:orion_wdt");

diff --git a/drivers/watchdog/rza_wdt.c b/drivers/watchdog/rza_wdt.c
new file mode 100644
index 0000000..e618218
--- /dev/null
+++ b/drivers/watchdog/rza_wdt.c

@@ -0,0 +1,199 @@
+/*
+ * Renesas RZ/A Series WDT Driver
+ *
+ * Copyright (C) 2017 Renesas Electronics America, Inc.
+ * Copyright (C) 2017 Chris Brandt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+#define DEFAULT_TIMEOUT		30
+
+/* Watchdog Timer Registers */
+#define WTCSR			0
+#define WTCSR_MAGIC		0xA500
+#define WTSCR_WT		BIT(6)
+#define WTSCR_TME		BIT(5)
+#define WTSCR_CKS(i)		(i)
+
+#define WTCNT			2
+#define WTCNT_MAGIC		0x5A00
+
+#define WRCSR			4
+#define WRCSR_MAGIC		0x5A00
+#define WRCSR_RSTE		BIT(6)
+#define WRCSR_CLEAR_WOVF	0xA500	/* special value */
+
+struct rza_wdt {
+	struct watchdog_device wdev;
+	void __iomem *base;
+	struct clk *clk;
+};
+
+static int rza_wdt_start(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	/* Stop timer */
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	/* Must dummy read WRCSR:WOVF at least once before clearing */
+	readb(priv->base + WRCSR);
+	writew(WRCSR_CLEAR_WOVF, priv->base + WRCSR);
+
+	/*
+	 * Start timer with slowest clock source and reset option enabled.
+	 */
+	writew(WRCSR_MAGIC | WRCSR_RSTE, priv->base + WRCSR);
+	writew(WTCNT_MAGIC | 0, priv->base + WTCNT);
+	writew(WTCSR_MAGIC | WTSCR_WT | WTSCR_TME | WTSCR_CKS(7),
+	       priv->base + WTCSR);
+
+	return 0;
+}
+
+static int rza_wdt_stop(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	return 0;
+}
+
+static int rza_wdt_ping(struct watchdog_device *wdev)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	writew(WTCNT_MAGIC | 0, priv->base + WTCNT);
+
+	return 0;
+}
+
+static int rza_wdt_restart(struct watchdog_device *wdev, unsigned long action,
+			    void *data)
+{
+	struct rza_wdt *priv = watchdog_get_drvdata(wdev);
+
+	/* Stop timer */
+	writew(WTCSR_MAGIC | 0, priv->base + WTCSR);
+
+	/* Must dummy read WRCSR:WOVF at least once before clearing */
+	readb(priv->base + WRCSR);
+	writew(WRCSR_CLEAR_WOVF, priv->base + WRCSR);
+
+	/*
+	 * Start timer with fastest clock source and only 1 clock left before
+	 * overflow with reset option enabled.
+	 */
+	writew(WRCSR_MAGIC | WRCSR_RSTE, priv->base + WRCSR);
+	writew(WTCNT_MAGIC | 255, priv->base + WTCNT);
+	writew(WTCSR_MAGIC | WTSCR_WT | WTSCR_TME, priv->base + WTCSR);
+
+	/*
+	 * Actually make sure the above sequence hits hardware before sleeping.
+	 */
+	wmb();
+
+	/* Wait for WDT overflow (reset) */
+	udelay(20);
+
+	return 0;
+}
+
+static const struct watchdog_info rza_wdt_ident = {
+	.options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+	.identity = "Renesas RZ/A WDT Watchdog",
+};
+
+static const struct watchdog_ops rza_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = rza_wdt_start,
+	.stop = rza_wdt_stop,
+	.ping = rza_wdt_ping,
+	.restart = rza_wdt_restart,
+};
+
+static int rza_wdt_probe(struct platform_device *pdev)
+{
+	struct rza_wdt *priv;
+	struct resource *res;
+	unsigned long rate;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+
+	rate = clk_get_rate(priv->clk);
+	if (rate < 16384) {
+		dev_err(&pdev->dev, "invalid clock rate (%ld)\n", rate);
+		return -ENOENT;
+	}
+
+	/* Assume slowest clock rate possible (CKS=7) */
+	rate /= 16384;
+
+	priv->wdev.info = &rza_wdt_ident,
+	priv->wdev.ops = &rza_wdt_ops,
+	priv->wdev.parent = &pdev->dev;
+
+	/*
+	 * Since the max possible timeout of our 8-bit count register is less
+	 * than a second, we must use max_hw_heartbeat_ms.
+	 */
+	priv->wdev.max_hw_heartbeat_ms = (1000 * U8_MAX) / rate;
+	dev_dbg(&pdev->dev, "max hw timeout of %dms\n",
+		 priv->wdev.max_hw_heartbeat_ms);
+
+	priv->wdev.min_timeout = 1;
+	priv->wdev.timeout = DEFAULT_TIMEOUT;
+
+	watchdog_init_timeout(&priv->wdev, 0, &pdev->dev);
+	watchdog_set_drvdata(&priv->wdev, priv);
+
+	ret = devm_watchdog_register_device(&pdev->dev, &priv->wdev);
+	if (ret)
+		dev_err(&pdev->dev, "Cannot register watchdog device\n");
+
+	return ret;
+}
+
+static const struct of_device_id rza_wdt_of_match[] = {
+	{ .compatible = "renesas,rza-wdt", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rza_wdt_of_match);
+
+static struct platform_driver rza_wdt_driver = {
+	.probe = rza_wdt_probe,
+	.driver = {
+		.name = "rza_wdt",
+		.of_match_table = rza_wdt_of_match,
+	},
+};
+
+module_platform_driver(rza_wdt_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/A WDT Driver");
+MODULE_AUTHOR("Chris Brandt <chris.brandt@renesas.com>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 6ed9759..adaa435 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c

@@ -1,5 +1,4 @@
-/* linux/drivers/char/watchdog/s3c2410_wdt.c
- *
+/*
  * Copyright (c) 2004 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
@@ -17,11 +16,7 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
+ */
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -37,6 +32,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/delay.h>
@@ -94,8 +90,7 @@ MODULE_PARM_DESC(tmr_atboot,
 			__MODULE_STRING(S3C2410_WATCHDOG_ATBOOT));
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 			__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, "
-			"0 to reboot (default 0)");
+MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, 0 to reboot (default 0)");
 
 /**
  * struct s3c2410_wdt_variant - Per-variant config data
@@ -131,7 +126,7 @@ struct s3c2410_wdt {
 	unsigned long		wtdat_save;
 	struct watchdog_device	wdt_device;
 	struct notifier_block	freq_transition;
-	struct s3c2410_wdt_variant *drv_data;
+	const struct s3c2410_wdt_variant *drv_data;
 	struct regmap *pmureg;
 };
 
@@ -310,7 +305,8 @@ static inline int s3c2410wdt_is_running(struct s3c2410_wdt *wdt)
 	return readl(wdt->reg_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE;
 }
 
-static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeout)
+static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd,
+				    unsigned int timeout)
 {
 	struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
 	unsigned long freq = clk_get_rate(wdt->clock);
@@ -401,7 +397,7 @@ static const struct watchdog_ops s3c2410wdt_ops = {
 	.restart = s3c2410wdt_restart,
 };
 
-static struct watchdog_device s3c2410_wdd = {
+static const struct watchdog_device s3c2410_wdd = {
 	.info = &s3c2410_wdt_ident,
 	.ops = &s3c2410wdt_ops,
 	.timeout = S3C2410_WATCHDOG_DEFAULT_TIME,
@@ -507,22 +503,24 @@ static inline unsigned int s3c2410wdt_get_bootstatus(struct s3c2410_wdt *wdt)
 	return 0;
 }
 
-static inline struct s3c2410_wdt_variant *
+static inline const struct s3c2410_wdt_variant *
 s3c2410_get_wdt_drv_data(struct platform_device *pdev)
 {
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_node(s3c2410_wdt_match, pdev->dev.of_node);
-		return (struct s3c2410_wdt_variant *)match->data;
-	} else {
-		return (struct s3c2410_wdt_variant *)
-			platform_get_device_id(pdev)->driver_data;
+	const struct s3c2410_wdt_variant *variant;
+
+	variant = of_device_get_match_data(&pdev->dev);
+	if (!variant) {
+		/* Device matched by platform_device_id */
+		variant = (struct s3c2410_wdt_variant *)
+			   platform_get_device_id(pdev)->driver_data;
 	}
+
+	return variant;
 }
 
 static int s3c2410wdt_probe(struct platform_device *pdev)
 {
-	struct device *dev;
+	struct device *dev = &pdev->dev;
 	struct s3c2410_wdt *wdt;
 	struct resource *wdt_mem;
 	struct resource *wdt_irq;
@@ -530,13 +528,11 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	int started = 0;
 	int ret;
 
-	dev = &pdev->dev;
-
 	wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
 	if (!wdt)
 		return -ENOMEM;
 
-	wdt->dev = &pdev->dev;
+	wdt->dev = dev;
 	spin_lock_init(&wdt->lock);
 	wdt->wdt_device = s3c2410_wdd;
 
@@ -592,7 +588,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	/* see if we can actually set the requested timer margin, and if
 	 * not, try the default value */
 
-	watchdog_init_timeout(&wdt->wdt_device, tmr_margin, &pdev->dev);
+	watchdog_init_timeout(&wdt->wdt_device, tmr_margin, dev);
 	ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
 					wdt->wdt_device.timeout);
 	if (ret) {
@@ -601,11 +597,10 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 
 		if (started == 0)
 			dev_info(dev,
-			   "tmr_margin value out of range, default %d used\n",
-			       S3C2410_WATCHDOG_DEFAULT_TIME);
+				 "tmr_margin value out of range, default %d used\n",
+				 S3C2410_WATCHDOG_DEFAULT_TIME);
 		else
-			dev_info(dev, "default timer value is out of range, "
-							"cannot start\n");
+			dev_info(dev, "default timer value is out of range, cannot start\n");
 	}
 
 	ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0,
@@ -619,7 +614,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 	watchdog_set_restart_priority(&wdt->wdt_device, 128);
 
 	wdt->wdt_device.bootstatus = s3c2410wdt_get_bootstatus(wdt);
-	wdt->wdt_device.parent = &pdev->dev;
+	wdt->wdt_device.parent = dev;
 
 	ret = watchdog_register_device(&wdt->wdt_device);
 	if (ret) {
@@ -754,7 +749,6 @@ static struct platform_driver s3c2410wdt_driver = {
 
 module_platform_driver(s3c2410wdt_driver);
 
-MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>, "
-	      "Dimitry Andric <dimitry.andric@tomtom.com>");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>, Dimitry Andric <dimitry.andric@tomtom.com>");
 MODULE_DESCRIPTION("S3C2410 Watchdog Device Driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index 362fd22..0ae947c 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c

@@ -228,15 +228,13 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 
 	wdt->reg_base = regs;
 
-	if (pdev->dev.of_node) {
-		irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-		if (!irq)
-			dev_warn(&pdev->dev, "failed to get IRQ from DT\n");
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (!irq)
+		dev_warn(&pdev->dev, "failed to get IRQ from DT\n");
 
-		ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt);
-		if (ret)
-			return ret;
-	}
+	ret = of_sama5d4_wdt_init(pdev->dev.of_node, wdt);
+	if (ret)
+		return ret;
 
 	if ((wdt->mr & AT91_WDT_WDFIEN) && irq) {
 		ret = devm_request_irq(&pdev->dev, irq, sama5d4_wdt_irq_handler,
@@ -302,6 +300,11 @@ static int sama5d4_wdt_resume(struct device *dev)
 {
 	struct sama5d4_wdt *wdt = dev_get_drvdata(dev);
 
+	/*
+	 * FIXME: writing MR also pings the watchdog which may not be desired.
+	 * This should only be done when the registers are lost on suspend but
+	 * there is no way to get this information right now.
+	 */
 	sama5d4_wdt_init(wdt);
 
 	return 0;

diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c
new file mode 100644
index 0000000..6c501b7
--- /dev/null
+++ b/drivers/watchdog/stm32_iwdg.c

@@ -0,0 +1,253 @@
+/*
+ * Driver for STM32 Independent Watchdog
+ *
+ * Copyright (C) Yannick Fertre 2017
+ * Author: Yannick Fertre <yannick.fertre@st.com>
+ *
+ * This driver is based on tegra_wdt.c
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/* IWDG registers */
+#define IWDG_KR		0x00 /* Key register */
+#define IWDG_PR		0x04 /* Prescaler Register */
+#define IWDG_RLR	0x08 /* ReLoad Register */
+#define IWDG_SR		0x0C /* Status Register */
+#define IWDG_WINR	0x10 /* Windows Register */
+
+/* IWDG_KR register bit mask */
+#define KR_KEY_RELOAD	0xAAAA /* reload counter enable */
+#define KR_KEY_ENABLE	0xCCCC /* peripheral enable */
+#define KR_KEY_EWA	0x5555 /* write access enable */
+#define KR_KEY_DWA	0x0000 /* write access disable */
+
+/* IWDG_PR register bit values */
+#define PR_4		0x00 /* prescaler set to 4 */
+#define PR_8		0x01 /* prescaler set to 8 */
+#define PR_16		0x02 /* prescaler set to 16 */
+#define PR_32		0x03 /* prescaler set to 32 */
+#define PR_64		0x04 /* prescaler set to 64 */
+#define PR_128		0x05 /* prescaler set to 128 */
+#define PR_256		0x06 /* prescaler set to 256 */
+
+/* IWDG_RLR register values */
+#define RLR_MIN		0x07C /* min value supported by reload register */
+#define RLR_MAX		0xFFF /* max value supported by reload register */
+
+/* IWDG_SR register bit mask */
+#define FLAG_PVU	BIT(0) /* Watchdog prescaler value update */
+#define FLAG_RVU	BIT(1) /* Watchdog counter reload value update */
+
+/* set timeout to 100000 us */
+#define TIMEOUT_US	100000
+#define SLEEP_US	1000
+
+struct stm32_iwdg {
+	struct watchdog_device	wdd;
+	void __iomem		*regs;
+	struct clk		*clk;
+	unsigned int		rate;
+};
+
+static inline u32 reg_read(void __iomem *base, u32 reg)
+{
+	return readl_relaxed(base + reg);
+}
+
+static inline void reg_write(void __iomem *base, u32 reg, u32 val)
+{
+	writel_relaxed(val, base + reg);
+}
+
+static int stm32_iwdg_start(struct watchdog_device *wdd)
+{
+	struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd);
+	u32 val = FLAG_PVU | FLAG_RVU;
+	u32 reload;
+	int ret;
+
+	dev_dbg(wdd->parent, "%s\n", __func__);
+
+	/* prescaler fixed to 256 */
+	reload = clamp_t(unsigned int, ((wdd->timeout * wdt->rate) / 256) - 1,
+			 RLR_MIN, RLR_MAX);
+
+	/* enable write access */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_EWA);
+
+	/* set prescaler & reload registers */
+	reg_write(wdt->regs, IWDG_PR, PR_256); /* prescaler fix to 256 */
+	reg_write(wdt->regs, IWDG_RLR, reload);
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_ENABLE);
+
+	/* wait for the registers to be updated (max 100ms) */
+	ret = readl_relaxed_poll_timeout(wdt->regs + IWDG_SR, val,
+					 !(val & (FLAG_PVU | FLAG_RVU)),
+					 SLEEP_US, TIMEOUT_US);
+	if (ret) {
+		dev_err(wdd->parent,
+			"Fail to set prescaler or reload registers\n");
+		return ret;
+	}
+
+	/* reload watchdog */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_RELOAD);
+
+	return 0;
+}
+
+static int stm32_iwdg_ping(struct watchdog_device *wdd)
+{
+	struct stm32_iwdg *wdt = watchdog_get_drvdata(wdd);
+
+	dev_dbg(wdd->parent, "%s\n", __func__);
+
+	/* reload watchdog */
+	reg_write(wdt->regs, IWDG_KR, KR_KEY_RELOAD);
+
+	return 0;
+}
+
+static int stm32_iwdg_set_timeout(struct watchdog_device *wdd,
+				  unsigned int timeout)
+{
+	dev_dbg(wdd->parent, "%s timeout: %d sec\n", __func__, timeout);
+
+	wdd->timeout = timeout;
+
+	if (watchdog_active(wdd))
+		return stm32_iwdg_start(wdd);
+
+	return 0;
+}
+
+static const struct watchdog_info stm32_iwdg_info = {
+	.options	= WDIOF_SETTIMEOUT |
+			  WDIOF_MAGICCLOSE |
+			  WDIOF_KEEPALIVEPING,
+	.identity	= "STM32 Independent Watchdog",
+};
+
+static struct watchdog_ops stm32_iwdg_ops = {
+	.owner		= THIS_MODULE,
+	.start		= stm32_iwdg_start,
+	.ping		= stm32_iwdg_ping,
+	.set_timeout	= stm32_iwdg_set_timeout,
+};
+
+static int stm32_iwdg_probe(struct platform_device *pdev)
+{
+	struct watchdog_device *wdd;
+	struct stm32_iwdg *wdt;
+	struct resource *res;
+	void __iomem *regs;
+	struct clk *clk;
+	int ret;
+
+	/* This is the timer base. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(regs)) {
+		dev_err(&pdev->dev, "Could not get resource\n");
+		return PTR_ERR(regs);
+	}
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Unable to get clock\n");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to prepare clock %p\n", clk);
+		return ret;
+	}
+
+	/*
+	 * Allocate our watchdog driver data, which has the
+	 * struct watchdog_device nested within it.
+	 */
+	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+	if (!wdt) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Initialize struct stm32_iwdg. */
+	wdt->regs = regs;
+	wdt->clk = clk;
+	wdt->rate = clk_get_rate(clk);
+
+	/* Initialize struct watchdog_device. */
+	wdd = &wdt->wdd;
+	wdd->info = &stm32_iwdg_info;
+	wdd->ops = &stm32_iwdg_ops;
+	wdd->min_timeout = ((RLR_MIN + 1) * 256) / wdt->rate;
+	wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * 256 * 1000) / wdt->rate;
+	wdd->parent = &pdev->dev;
+
+	watchdog_set_drvdata(wdd, wdt);
+	watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT);
+
+	ret = watchdog_init_timeout(wdd, 0, &pdev->dev);
+	if (ret)
+		dev_warn(&pdev->dev,
+			 "unable to set timeout value, using default\n");
+
+	ret = watchdog_register_device(wdd);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register watchdog device\n");
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wdt);
+
+	return 0;
+err:
+	clk_disable_unprepare(clk);
+
+	return ret;
+}
+
+static int stm32_iwdg_remove(struct platform_device *pdev)
+{
+	struct stm32_iwdg *wdt = platform_get_drvdata(pdev);
+
+	watchdog_unregister_device(&wdt->wdd);
+	clk_disable_unprepare(wdt->clk);
+
+	return 0;
+}
+
+static const struct of_device_id stm32_iwdg_of_match[] = {
+	{ .compatible = "st,stm32-iwdg" },
+	{ /* end node */ }
+};
+MODULE_DEVICE_TABLE(of, stm32_iwdg_of_match);
+
+static struct platform_driver stm32_iwdg_driver = {
+	.probe		= stm32_iwdg_probe,
+	.remove		= stm32_iwdg_remove,
+	.driver = {
+		.name	= "iwdg",
+		.of_match_table = stm32_iwdg_of_match,
+	},
+};
+module_platform_driver(stm32_iwdg_driver);
+
+MODULE_AUTHOR("Yannick Fertre <yannick.fertre@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STM32 Independent Watchdog Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/watchdog/uniphier_wdt.c b/drivers/watchdog/uniphier_wdt.c
new file mode 100644
index 0000000..0ea2339
--- /dev/null
+++ b/drivers/watchdog/uniphier_wdt.c

@@ -0,0 +1,268 @@
+/*
+ * Watchdog driver for the UniPhier watchdog timer
+ *
+ * (c) Copyright 2014 Panasonic Corporation
+ * (c) Copyright 2016 Socionext Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/watchdog.h>
+
+/* WDT timer setting register */
+#define WDTTIMSET			0x3004
+#define   WDTTIMSET_PERIOD_MASK		(0xf << 0)
+#define   WDTTIMSET_PERIOD_1_SEC	(0x3 << 0)
+
+/* WDT reset selection register */
+#define WDTRSTSEL			0x3008
+#define   WDTRSTSEL_RSTSEL_MASK		(0x3 << 0)
+#define   WDTRSTSEL_RSTSEL_BOTH		(0x0 << 0)
+#define   WDTRSTSEL_RSTSEL_IRQ_ONLY	(0x2 << 0)
+
+/* WDT control register */
+#define WDTCTRL				0x300c
+#define   WDTCTRL_STATUS		BIT(8)
+#define   WDTCTRL_CLEAR			BIT(1)
+#define   WDTCTRL_ENABLE		BIT(0)
+
+#define SEC_TO_WDTTIMSET_PRD(sec) \
+		(ilog2(sec) + WDTTIMSET_PERIOD_1_SEC)
+
+#define WDTST_TIMEOUT			1000 /* usec */
+
+#define WDT_DEFAULT_TIMEOUT		64   /* Default is 64 seconds */
+#define WDT_PERIOD_MIN			1
+#define WDT_PERIOD_MAX			128
+
+static unsigned int timeout = 0;
+static bool nowayout = WATCHDOG_NOWAYOUT;
+
+struct uniphier_wdt_dev {
+	struct watchdog_device wdt_dev;
+	struct regmap	*regmap;
+};
+
+/*
+ * UniPhier Watchdog operations
+ */
+static int uniphier_watchdog_ping(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int val;
+	int ret;
+
+	/* Clear counter */
+	ret = regmap_write_bits(wdev->regmap, WDTCTRL,
+				WDTCTRL_CLEAR, WDTCTRL_CLEAR);
+	if (!ret)
+		/*
+		 * As SoC specification, after clear counter,
+		 * it needs to wait until counter status is 1.
+		 */
+		ret = regmap_read_poll_timeout(wdev->regmap, WDTCTRL, val,
+					       (val & WDTCTRL_STATUS),
+					       0, WDTST_TIMEOUT);
+
+	return ret;
+}
+
+static int __uniphier_watchdog_start(struct regmap *regmap, unsigned int sec)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read_poll_timeout(regmap, WDTCTRL, val,
+				       !(val & WDTCTRL_STATUS),
+				       0, WDTST_TIMEOUT);
+	if (ret)
+		return ret;
+
+	/* Setup period */
+	ret = regmap_write(regmap, WDTTIMSET,
+			   SEC_TO_WDTTIMSET_PRD(sec));
+	if (ret)
+		return ret;
+
+	/* Enable and clear watchdog */
+	ret = regmap_write(regmap, WDTCTRL, WDTCTRL_ENABLE | WDTCTRL_CLEAR);
+	if (!ret)
+		/*
+		 * As SoC specification, after clear counter,
+		 * it needs to wait until counter status is 1.
+		 */
+		ret = regmap_read_poll_timeout(regmap, WDTCTRL, val,
+					       (val & WDTCTRL_STATUS),
+					       0, WDTST_TIMEOUT);
+
+	return ret;
+}
+
+static int __uniphier_watchdog_stop(struct regmap *regmap)
+{
+	/* Disable and stop watchdog */
+	return regmap_write_bits(regmap, WDTCTRL, WDTCTRL_ENABLE, 0);
+}
+
+static int __uniphier_watchdog_restart(struct regmap *regmap, unsigned int sec)
+{
+	int ret;
+
+	ret = __uniphier_watchdog_stop(regmap);
+	if (ret)
+		return ret;
+
+	return __uniphier_watchdog_start(regmap, sec);
+}
+
+static int uniphier_watchdog_start(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int tmp_timeout;
+
+	tmp_timeout = roundup_pow_of_two(w->timeout);
+
+	return __uniphier_watchdog_start(wdev->regmap, tmp_timeout);
+}
+
+static int uniphier_watchdog_stop(struct watchdog_device *w)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+
+	return __uniphier_watchdog_stop(wdev->regmap);
+}
+
+static int uniphier_watchdog_set_timeout(struct watchdog_device *w,
+					 unsigned int t)
+{
+	struct uniphier_wdt_dev *wdev = watchdog_get_drvdata(w);
+	unsigned int tmp_timeout;
+	int ret;
+
+	tmp_timeout = roundup_pow_of_two(t);
+	if (tmp_timeout == w->timeout)
+		return 0;
+
+	if (watchdog_active(w)) {
+		ret = __uniphier_watchdog_restart(wdev->regmap, tmp_timeout);
+		if (ret)
+			return ret;
+	}
+
+	w->timeout = tmp_timeout;
+
+	return 0;
+}
+
+/*
+ * Kernel Interfaces
+ */
+static const struct watchdog_info uniphier_wdt_info = {
+	.identity	= "uniphier-wdt",
+	.options	= WDIOF_SETTIMEOUT |
+			  WDIOF_KEEPALIVEPING |
+			  WDIOF_MAGICCLOSE |
+			  WDIOF_OVERHEAT,
+};
+
+static const struct watchdog_ops uniphier_wdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= uniphier_watchdog_start,
+	.stop		= uniphier_watchdog_stop,
+	.ping		= uniphier_watchdog_ping,
+	.set_timeout	= uniphier_watchdog_set_timeout,
+};
+
+static int uniphier_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct uniphier_wdt_dev *wdev;
+	struct regmap *regmap;
+	struct device_node *parent;
+	int ret;
+
+	wdev = devm_kzalloc(dev, sizeof(*wdev), GFP_KERNEL);
+	if (!wdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, wdev);
+
+	parent = of_get_parent(dev->of_node); /* parent should be syscon node */
+	regmap = syscon_node_to_regmap(parent);
+	of_node_put(parent);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	wdev->regmap = regmap;
+	wdev->wdt_dev.info = &uniphier_wdt_info;
+	wdev->wdt_dev.ops = &uniphier_wdt_ops;
+	wdev->wdt_dev.max_timeout = WDT_PERIOD_MAX;
+	wdev->wdt_dev.min_timeout = WDT_PERIOD_MIN;
+	wdev->wdt_dev.parent = dev;
+
+	if (watchdog_init_timeout(&wdev->wdt_dev, timeout, dev) < 0) {
+		wdev->wdt_dev.timeout = WDT_DEFAULT_TIMEOUT;
+	}
+	watchdog_set_nowayout(&wdev->wdt_dev, nowayout);
+	watchdog_stop_on_reboot(&wdev->wdt_dev);
+
+	watchdog_set_drvdata(&wdev->wdt_dev, wdev);
+
+	uniphier_watchdog_stop(&wdev->wdt_dev);
+	ret = regmap_write(wdev->regmap, WDTRSTSEL, WDTRSTSEL_RSTSEL_BOTH);
+	if (ret)
+		return ret;
+
+	ret = devm_watchdog_register_device(dev, &wdev->wdt_dev);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "watchdog driver (timeout=%d sec, nowayout=%d)\n",
+		 wdev->wdt_dev.timeout, nowayout);
+
+	return 0;
+}
+
+static const struct of_device_id uniphier_wdt_dt_ids[] = {
+	{ .compatible = "socionext,uniphier-wdt" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_wdt_dt_ids);
+
+static struct platform_driver uniphier_wdt_driver = {
+	.probe		= uniphier_wdt_probe,
+	.driver		= {
+		.name		= "uniphier-wdt",
+		.of_match_table	= uniphier_wdt_dt_ids,
+	},
+};
+
+module_platform_driver(uniphier_wdt_driver);
+
+module_param(timeout, uint, 0000);
+MODULE_PARM_DESC(timeout,
+	"Watchdog timeout seconds in power of 2. (0 < timeout < 128, default="
+				__MODULE_STRING(WDT_DEFAULT_TIMEOUT) ")");
+
+module_param(nowayout, bool, 0000);
+MODULE_PARM_DESC(nowayout,
+	"Watchdog cannot be stopped once started (default="
+				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+MODULE_AUTHOR("Keiji Hayashibara <hayashibara.keiji@socionext.com>");
+MODULE_DESCRIPTION("UniPhier Watchdog Device Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 98fd186..d9ba049 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c

@@ -49,7 +49,8 @@ static int cr_wdt_csr;		/* WDT control & status register */
 
 enum chips { w83627hf, w83627s, w83697hf, w83697ug, w83637hf, w83627thf,
 	     w83687thf, w83627ehf, w83627dhg, w83627uhg, w83667hg, w83627dhg_p,
-	     w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6102 };
+	     w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
+	     nct6795, nct6102 };
 
 static int timeout;			/* in seconds */
 module_param(timeout, int, 0);
@@ -97,6 +98,8 @@ MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)");
 #define NCT6779_ID		0xc5
 #define NCT6791_ID		0xc8
 #define NCT6792_ID		0xc9
+#define NCT6793_ID		0xd1
+#define NCT6795_ID		0xd3
 
 #define W83627HF_WDT_TIMEOUT	0xf6
 #define W83697HF_WDT_TIMEOUT	0xf4
@@ -204,6 +207,8 @@ static int w83627hf_init(struct watchdog_device *wdog, enum chips chip)
 	case nct6779:
 	case nct6791:
 	case nct6792:
+	case nct6793:
+	case nct6795:
 	case nct6102:
 		/*
 		 * These chips have a fixed WDTO# output pin (W83627UHG),
@@ -396,6 +401,12 @@ static int wdt_find(int addr)
 	case NCT6792_ID:
 		ret = nct6792;
 		break;
+	case NCT6793_ID:
+		ret = nct6793;
+		break;
+	case NCT6795_ID:
+		ret = nct6795;
+		break;
 	case NCT6102_ID:
 		ret = nct6102;
 		cr_wdt_timeout = NCT6102D_WDT_TIMEOUT;
@@ -437,6 +448,8 @@ static int __init wdt_init(void)
 		"NCT6779",
 		"NCT6791",
 		"NCT6792",
+		"NCT6793",
+		"NCT6795",
 		"NCT6102",
 	};
 

diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index d5d2bbd..0826e66 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c

@@ -80,6 +80,9 @@ static struct watchdog_core_data *old_wd_data;
 
 static struct workqueue_struct *watchdog_wq;
 
+static bool handle_boot_enabled =
+	IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED);
+
 static inline bool watchdog_need_worker(struct watchdog_device *wdd)
 {
 	/* All variables in milli-seconds */
@@ -192,18 +195,23 @@ static int watchdog_ping(struct watchdog_device *wdd)
 	return __watchdog_ping(wdd);
 }
 
+static bool watchdog_worker_should_ping(struct watchdog_core_data *wd_data)
+{
+	struct watchdog_device *wdd = wd_data->wdd;
+
+	return wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd));
+}
+
 static void watchdog_ping_work(struct work_struct *work)
 {
 	struct watchdog_core_data *wd_data;
-	struct watchdog_device *wdd;
 
 	wd_data = container_of(to_delayed_work(work), struct watchdog_core_data,
 			       work);
 
 	mutex_lock(&wd_data->lock);
-	wdd = wd_data->wdd;
-	if (wdd && (watchdog_active(wdd) || watchdog_hw_running(wdd)))
-		__watchdog_ping(wdd);
+	if (watchdog_worker_should_ping(wd_data))
+		__watchdog_ping(wd_data->wdd);
 	mutex_unlock(&wd_data->lock);
 }
 
@@ -956,9 +964,14 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
 	 * and schedule an immediate ping.
 	 */
 	if (watchdog_hw_running(wdd)) {
-		__module_get(wdd->ops->owner);
-		kref_get(&wd_data->kref);
-		queue_delayed_work(watchdog_wq, &wd_data->work, 0);
+		if (handle_boot_enabled) {
+			__module_get(wdd->ops->owner);
+			kref_get(&wd_data->kref);
+			queue_delayed_work(watchdog_wq, &wd_data->work, 0);
+		} else {
+			pr_info("watchdog%d running and kernel based pre-userspace handler disabled\n",
+					wdd->id);
+		}
 	}
 
 	return 0;
@@ -1106,3 +1119,8 @@ void __exit watchdog_dev_exit(void)
 	class_unregister(&watchdog_class);
 	destroy_workqueue(watchdog_wq);
 }
+
+module_param(handle_boot_enabled, bool, 0444);
+MODULE_PARM_DESC(handle_boot_enabled,
+	"Watchdog core auto-updates boot enabled watchdogs before userspace takes over (default="
+	__MODULE_STRING(IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED)) ")");

diff --git a/drivers/watchdog/zx2967_wdt.c b/drivers/watchdog/zx2967_wdt.c
index c982527..69ec585 100644
--- a/drivers/watchdog/zx2967_wdt.c
+++ b/drivers/watchdog/zx2967_wdt.c

@@ -154,7 +154,7 @@ static const struct watchdog_info zx2967_wdt_ident = {
 	.identity         =	"zx2967 watchdog",
 };
 
-static struct watchdog_ops zx2967_wdt_ops = {
+static const struct watchdog_ops zx2967_wdt_ops = {
 	.owner = THIS_MODULE,
 	.start = zx2967_wdt_start,
 	.stop = zx2967_wdt_stop,

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index d6950e0..7bc88fd 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c

@@ -134,11 +134,8 @@ struct vscsibk_pend {
 	struct page *pages[VSCSI_MAX_GRANTS];
 
 	struct se_cmd se_cmd;
-};
 
-struct scsiback_tmr {
-	atomic_t tmr_complete;
-	wait_queue_head_t tmr_wait;
+	struct completion tmr_done;
 };
 
 #define VSCSI_DEFAULT_SESSION_TAGS	128
@@ -599,36 +596,28 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	struct scsiback_tpg *tpg = pending_req->v2p->tpg;
 	struct scsiback_nexus *nexus = tpg->tpg_nexus;
 	struct se_cmd *se_cmd = &pending_req->se_cmd;
-	struct scsiback_tmr *tmr;
 	u64 unpacked_lun = pending_req->v2p->lun;
 	int rc, err = FAILED;
 
-	tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
-	if (!tmr) {
-		target_put_sess_cmd(se_cmd);
-		goto err;
-	}
-
-	init_waitqueue_head(&tmr->tmr_wait);
+	init_completion(&pending_req->tmr_done);
 
 	rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess,
 			       &pending_req->sense_buffer[0],
-			       unpacked_lun, tmr, act, GFP_KERNEL,
+			       unpacked_lun, NULL, act, GFP_KERNEL,
 			       tag, TARGET_SCF_ACK_KREF);
 	if (rc)
 		goto err;
 
-	wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
+	wait_for_completion(&pending_req->tmr_done);
 
 	err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
 		SUCCESS : FAILED;
 
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
-	transport_generic_free_cmd(&pending_req->se_cmd, 1);
+	transport_generic_free_cmd(&pending_req->se_cmd, 0);
 	return;
+
 err:
-	if (tmr)
-		kfree(tmr);
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
 }
 
@@ -1389,12 +1378,6 @@ static int scsiback_check_stop_free(struct se_cmd *se_cmd)
 static void scsiback_release_cmd(struct se_cmd *se_cmd)
 {
 	struct se_session *se_sess = se_cmd->se_sess;
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-
-	if (se_tmr && se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
-		struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
-		kfree(tmr);
-	}
 
 	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
@@ -1455,11 +1438,10 @@ static int scsiback_queue_status(struct se_cmd *se_cmd)
 
 static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
 {
-	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
-	struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
+	struct vscsibk_pend *pending_req = container_of(se_cmd,
+				struct vscsibk_pend, se_cmd);
 
-	atomic_set(&tmr->tmr_complete, 1);
-	wake_up(&tmr->tmr_wait);
+	complete(&pending_req->tmr_done);
 }
 
 static void scsiback_aborted_task(struct se_cmd *se_cmd)

diff --git a/fs/Kconfig b/fs/Kconfig
index b0e42b6..7aee6d6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig

@@ -80,7 +80,6 @@
 config FILE_LOCKING
 	bool "Enable POSIX file locking API" if EXPERT
 	default y
-	select PERCPU_RWSEM
 	help
 	  This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 974f534..beef981 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h

@@ -83,7 +83,7 @@ struct autofs_info {
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	/* We use the following to see what we are waiting for */
 	struct qstr name;
 	u32 dev;

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 7071895..24a58bf 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c

@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 	size_t pktsz;
 
 	pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
-		 (unsigned long) wq->wait_queue_entry_token,
+		 (unsigned long) wq->wait_queue_token,
 		 wq->name.len, wq->name.name, type);
 
 	memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*mp);
 
-		mp->wait_queue_entry_token = wq->wait_queue_entry_token;
+		mp->wait_queue_token = wq->wait_queue_token;
 		mp->len = wq->name.len;
 		memcpy(mp->name, wq->name.name, wq->name.len);
 		mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*ep);
 
-		ep->wait_queue_entry_token = wq->wait_queue_entry_token;
+		ep->wait_queue_token = wq->wait_queue_token;
 		ep->len = wq->name.len;
 		memcpy(ep->name, wq->name.name, wq->name.len);
 		ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 
 		pktsz = sizeof(*packet);
 
-		packet->wait_queue_entry_token = wq->wait_queue_entry_token;
+		packet->wait_queue_token = wq->wait_queue_token;
 		packet->len = wq->name.len;
 		memcpy(packet->name, wq->name.name, wq->name.len);
 		packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 			return -ENOMEM;
 		}
 
-		wq->wait_queue_entry_token = autofs4_next_wait_queue;
+		wq->wait_queue_token = autofs4_next_wait_queue;
 		if (++autofs4_next_wait_queue == 0)
 			autofs4_next_wait_queue = 1;
 		wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 		}
 
 		pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_token, wq->name.len,
 			 wq->name.name, notify);
 
 		/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	} else {
 		wq->wait_ctr++;
 		pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
-			 (unsigned long) wq->wait_queue_entry_token, wq->name.len,
+			 (unsigned long) wq->wait_queue_token, wq->name.len,
 			 wq->name.name, notify);
 		mutex_unlock(&sbi->wq_mutex);
 		kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 }
 
 
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
 {
 	struct autofs_wait_queue *wq, **wql;
 
 	mutex_lock(&sbi->wq_mutex);
 	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if (wq->wait_queue_entry_token == wait_queue_entry_token)
+		if (wq->wait_queue_token == wait_queue_token)
 			break;
 	}
 

diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index d509887..1b7e0f7 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c

@@ -120,18 +120,15 @@ static int befs_compare_strings(const void *key1, int keylen1,
 				const void *key2, int keylen2);
 
 /**
- * befs_bt_read_super - read in btree superblock convert to cpu byteorder
- * @sb: Filesystem superblock
- * @ds: Datastream to read from
- * @sup: Buffer in which to place the btree superblock
+ * befs_bt_read_super() - read in btree superblock convert to cpu byteorder
+ * @sb:        Filesystem superblock
+ * @ds:        Datastream to read from
+ * @sup:       Buffer in which to place the btree superblock
  *
  * Calls befs_read_datastream to read in the btree superblock and
  * makes sure it is in cpu byteorder, byteswapping if necessary.
- *
- * On success, returns BEFS_OK and *@sup contains the btree superblock,
- * in cpu byte order.
- *
- * On failure, BEFS_ERR is returned.
+ * Return: BEFS_OK on success and if *@sup contains the btree superblock in cpu
+ * byte order. Otherwise return BEFS_ERR on error.
  */
 static int
 befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 25e312c..9a69392 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c

@@ -419,7 +419,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		if (i_sblock > info->si_blocks ||
 			i_eblock > info->si_blocks ||
 			i_sblock > i_eblock ||
-			i_eoff > s_size ||
+			(i_eoff != le32_to_cpu(-1) && i_eoff > s_size) ||
 			i_sblock * BFS_BSIZE > i_eoff) {
 
 			printf("Inode 0x%08x corrupted\n", i);

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5075fd5..879ff9c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c

@@ -163,8 +163,6 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	unsigned long p = bprm->p;
 	int argc = bprm->argc;
 	int envc = bprm->envc;
-	elf_addr_t __user *argv;
-	elf_addr_t __user *envp;
 	elf_addr_t __user *sp;
 	elf_addr_t __user *u_platform;
 	elf_addr_t __user *u_base_platform;
@@ -304,38 +302,38 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
 	if (__put_user(argc, sp++))
 		return -EFAULT;
-	argv = sp;
-	envp = argv + argc + 1;
 
-	/* Populate argv and envp */
+	/* Populate list of argv pointers back to argv strings. */
 	p = current->mm->arg_end = current->mm->arg_start;
 	while (argc-- > 0) {
 		size_t len;
-		if (__put_user((elf_addr_t)p, argv++))
+		if (__put_user((elf_addr_t)p, sp++))
 			return -EFAULT;
 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
-	if (__put_user(0, argv))
+	if (__put_user(0, sp++))
 		return -EFAULT;
-	current->mm->arg_end = current->mm->env_start = p;
+	current->mm->arg_end = p;
+
+	/* Populate list of envp pointers back to envp strings. */
+	current->mm->env_end = current->mm->env_start = p;
 	while (envc-- > 0) {
 		size_t len;
-		if (__put_user((elf_addr_t)p, envp++))
+		if (__put_user((elf_addr_t)p, sp++))
 			return -EFAULT;
 		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 		if (!len || len > MAX_ARG_STRLEN)
 			return -EINVAL;
 		p += len;
 	}
-	if (__put_user(0, envp))
+	if (__put_user(0, sp++))
 		return -EFAULT;
 	current->mm->env_end = p;
 
 	/* Put the elf_info on the stack in the right place.  */
-	sp = (elf_addr_t __user *)envp + 1;
 	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 		return -EFAULT;
 	return 0;
@@ -927,17 +925,60 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 
 		vaddr = elf_ppnt->p_vaddr;
+		/*
+		 * If we are loading ET_EXEC or we have already performed
+		 * the ET_DYN load_addr calculations, proceed normally.
+		 */
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 			elf_flags |= MAP_FIXED;
 		} else if (loc->elf_ex.e_type == ET_DYN) {
-			/* Try and get dynamic programs out of the way of the
-			 * default mmap base, as well as whatever program they
-			 * might try to exec.  This is because the brk will
-			 * follow the loader, and is not movable.  */
-			load_bias = ELF_ET_DYN_BASE - vaddr;
-			if (current->flags & PF_RANDOMIZE)
-				load_bias += arch_mmap_rnd();
-			load_bias = ELF_PAGESTART(load_bias);
+			/*
+			 * This logic is run once for the first LOAD Program
+			 * Header for ET_DYN binaries to calculate the
+			 * randomization (load_bias) for all the LOAD
+			 * Program Headers, and to calculate the entire
+			 * size of the ELF mapping (total_size). (Note that
+			 * load_addr_set is set to true later once the
+			 * initial mapping is performed.)
+			 *
+			 * There are effectively two types of ET_DYN
+			 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
+			 * and loaders (ET_DYN without INTERP, since they
+			 * _are_ the ELF interpreter). The loaders must
+			 * be loaded away from programs since the program
+			 * may otherwise collide with the loader (especially
+			 * for ET_EXEC which does not have a randomized
+			 * position). For example to handle invocations of
+			 * "./ld.so someprog" to test out a new version of
+			 * the loader, the subsequent program that the
+			 * loader loads must avoid the loader itself, so
+			 * they cannot share the same load range. Sufficient
+			 * room for the brk must be allocated with the
+			 * loader as well, since brk must be available with
+			 * the loader.
+			 *
+			 * Therefore, programs are loaded offset from
+			 * ELF_ET_DYN_BASE and loaders are loaded into the
+			 * independently randomized mmap region (0 load_bias
+			 * without MAP_FIXED).
+			 */
+			if (elf_interpreter) {
+				load_bias = ELF_ET_DYN_BASE;
+				if (current->flags & PF_RANDOMIZE)
+					load_bias += arch_mmap_rnd();
+				elf_flags |= MAP_FIXED;
+			} else
+				load_bias = 0;
+
+			/*
+			 * Since load_bias is used for all subsequent loading
+			 * calculations, we must lower it by the first vaddr
+			 * so that the remaining calculations based on the
+			 * ELF vaddrs will be correctly offset. The result
+			 * is then page aligned.
+			 */
+			load_bias = ELF_PAGESTART(load_bias - vaddr);
+
 			total_size = total_mapping_size(elf_phdata,
 							loc->elf_ex.e_phnum);
 			if (!total_size) {

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a85d790..9e75d8a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -1881,16 +1881,25 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	ssize_t num_written = 0;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	ssize_t err;
-	loff_t pos = iocb->ki_pos;
+	loff_t pos;
 	size_t count = iov_iter_count(from);
 	loff_t oldsize;
 	int clean_page = 0;
 
-	if ((iocb->ki_flags & IOCB_NOWAIT) &&
-			(iocb->ki_flags & IOCB_DIRECT)) {
-		/* Don't sleep on inode rwsem */
-		if (!inode_trylock(inode))
+	if (!inode_trylock(inode)) {
+		if (iocb->ki_flags & IOCB_NOWAIT)
 			return -EAGAIN;
+		inode_lock(inode);
+	}
+
+	err = generic_write_checks(iocb, from);
+	if (err <= 0) {
+		inode_unlock(inode);
+		return err;
+	}
+
+	pos = iocb->ki_pos;
+	if (iocb->ki_flags & IOCB_NOWAIT) {
 		/*
 		 * We will allocate space in case nodatacow is not set,
 		 * so bail
@@ -1901,13 +1910,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 			inode_unlock(inode);
 			return -EAGAIN;
 		}
-	} else
-		inode_lock(inode);
-
-	err = generic_write_checks(iocb, from);
-	if (err <= 0) {
-		inode_unlock(inode);
-		return err;
 	}
 
 	current->backing_dev_info = inode_to_bdi(inode);

diff --git a/fs/buffer.c b/fs/buffer.c
index 233e2983..5715dac 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -1281,44 +1281,31 @@ static inline void check_irqs_on(void)
 }
 
 /*
- * The LRU management algorithm is dopey-but-simple.  Sorry.
+ * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
+ * inserted at the front, and the buffer_head at the back if any is evicted.
+ * Or, if already in the LRU it is moved to the front.
  */
 static void bh_lru_install(struct buffer_head *bh)
 {
-	struct buffer_head *evictee = NULL;
+	struct buffer_head *evictee = bh;
+	struct bh_lru *b;
+	int i;
 
 	check_irqs_on();
 	bh_lru_lock();
-	if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
-		struct buffer_head *bhs[BH_LRU_SIZE];
-		int in;
-		int out = 0;
 
-		get_bh(bh);
-		bhs[out++] = bh;
-		for (in = 0; in < BH_LRU_SIZE; in++) {
-			struct buffer_head *bh2 =
-				__this_cpu_read(bh_lrus.bhs[in]);
-
-			if (bh2 == bh) {
-				__brelse(bh2);
-			} else {
-				if (out >= BH_LRU_SIZE) {
-					BUG_ON(evictee != NULL);
-					evictee = bh2;
-				} else {
-					bhs[out++] = bh2;
-				}
-			}
+	b = this_cpu_ptr(&bh_lrus);
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		swap(evictee, b->bhs[i]);
+		if (evictee == bh) {
+			bh_lru_unlock();
+			return;
 		}
-		while (out < BH_LRU_SIZE)
-			bhs[out++] = NULL;
-		memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
 	}
-	bh_lru_unlock();
 
-	if (evictee)
-		__brelse(evictee);
+	get_bh(bh);
+	bh_lru_unlock();
+	brelse(evictee);
 }
 
 /*
@@ -3501,6 +3488,130 @@ int bh_submit_read(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(bh_submit_read);
 
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
+ *
+ * Returns the offset within the file on success, and -ENOENT otherwise.
+ */
+static loff_t
+page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
+{
+	loff_t offset = page_offset(page);
+	struct buffer_head *bh, *head;
+	bool seek_data = whence == SEEK_DATA;
+
+	if (lastoff < offset)
+		lastoff = offset;
+
+	bh = head = page_buffers(page);
+	do {
+		offset += bh->b_size;
+		if (lastoff >= offset)
+			continue;
+
+		/*
+		 * Unwritten extents that have data in the page cache covering
+		 * them can be identified by the BH_Unwritten state flag.
+		 * Pages with multiple buffers might have a mix of holes, data
+		 * and unwritten extents - any buffer with valid data in it
+		 * should have BH_Uptodate flag set on it.
+		 */
+
+		if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
+			return lastoff;
+
+		lastoff = offset;
+	} while ((bh = bh->b_this_page) != head);
+	return -ENOENT;
+}
+
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ *
+ * Within unwritten extents, the page cache determines which parts are holes
+ * and which are data: unwritten and uptodate buffer heads count as data;
+ * everything else counts as a hole.
+ *
+ * Returns the resulting offset on successs, and -ENOENT otherwise.
+ */
+loff_t
+page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
+			  int whence)
+{
+	pgoff_t index = offset >> PAGE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
+	loff_t lastoff = offset;
+	struct pagevec pvec;
+
+	if (length <= 0)
+		return -ENOENT;
+
+	pagevec_init(&pvec, 0);
+
+	do {
+		unsigned want, nr_pages, i;
+
+		want = min_t(unsigned, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or
+			 * even swizzled back from swapper_space to tmpfs file
+			 * mapping.  However, page->index will not change
+			 * because we have a reference on the page.
+                         *
+			 * If current page offset is beyond where we've ended,
+			 * we've found a hole.
+                         */
+			if (whence == SEEK_HOLE &&
+			    lastoff < page_offset(page))
+				goto check_range;
+
+			/* Searching done if the page index is out of range. */
+			if (page->index >= end)
+				goto not_found;
+
+			lock_page(page);
+			if (likely(page->mapping == inode->i_mapping) &&
+			    page_has_buffers(page)) {
+				lastoff = page_seek_hole_data(page, lastoff, whence);
+				if (lastoff >= 0) {
+					unlock_page(page);
+					goto check_range;
+				}
+			}
+			unlock_page(page);
+			lastoff = page_offset(page) + PAGE_SIZE;
+		}
+
+		/* Searching done if fewer pages returned than wanted. */
+		if (nr_pages < want)
+			break;
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index < end);
+
+	/* When no page at lastoff and we are not done, we found a hole. */
+	if (whence != SEEK_HOLE)
+		goto not_found;
+
+check_range:
+	if (lastoff < offset + length)
+		goto out;
+not_found:
+	lastoff = -ENOENT;
+out:
+	pagevec_release(&pvec);
+	return lastoff;
+}
+
 void __init buffer_init(void)
 {
 	unsigned long nrpages;

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1e71e6c..5083628 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c

@@ -530,14 +530,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	long writeback_stat;
 	u64 truncate_size;
 	u32 truncate_seq;
-	int err = 0, len = PAGE_SIZE;
+	int err, len = PAGE_SIZE;
 
 	dout("writepage %p idx %lu\n", page, page->index);
 
-	if (!page->mapping || !page->mapping->host) {
-		dout("writepage %p - no mapping\n", page);
-		return -EFAULT;
-	}
 	inode = page->mapping->host;
 	ci = ceph_inode(inode);
 	fsc = ceph_inode_to_client(inode);
@@ -547,7 +543,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	snapc = page_snap_context(page);
 	if (snapc == NULL) {
 		dout("writepage %p page %p not dirty?\n", inode, page);
-		goto out;
+		return 0;
 	}
 	oldest = get_oldest_context(inode, &snap_size,
 				    &truncate_size, &truncate_seq);
@@ -555,9 +551,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		dout("writepage %p page %p snapc %p not writeable - noop\n",
 		     inode, page, snapc);
 		/* we should only noop if called by kswapd */
-		WARN_ON((current->flags & PF_MEMALLOC) == 0);
+		WARN_ON(!(current->flags & PF_MEMALLOC));
 		ceph_put_snap_context(oldest);
-		goto out;
+		redirty_page_for_writepage(wbc, page);
+		return 0;
 	}
 	ceph_put_snap_context(oldest);
 
@@ -567,8 +564,9 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	/* is this a partial page at end of file? */
 	if (page_off >= snap_size) {
 		dout("%p page eof %llu\n", page, snap_size);
-		goto out;
+		return 0;
 	}
+
 	if (snap_size < page_off + len)
 		len = snap_size - page_off;
 
@@ -595,7 +593,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 			dout("writepage interrupted page %p\n", page);
 			redirty_page_for_writepage(wbc, page);
 			end_page_writeback(page);
-			goto out;
+			return err;
 		}
 		dout("writepage setting page/mapping error %d %p\n",
 		     err, page);
@@ -611,7 +609,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	end_page_writeback(page);
 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
 	ceph_put_snap_context(snapc);  /* page's reference */
-out:
 	return err;
 }
 
@@ -1318,7 +1315,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  struct page *page, void *fsdata)
 {
 	struct inode *inode = file_inode(file);
-	int check_cap = 0;
+	bool check_cap = false;
 
 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
 	     inode, page, (int)pos, (int)copied, (int)len);

diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 4e7421c..fd11728 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c

@@ -35,18 +35,34 @@ struct fscache_netfs ceph_cache_netfs = {
 	.version	= 0,
 };
 
+static DEFINE_MUTEX(ceph_fscache_lock);
+static LIST_HEAD(ceph_fscache_list);
+
+struct ceph_fscache_entry {
+	struct list_head list;
+	struct fscache_cookie *fscache;
+	struct ceph_fsid fsid;
+	size_t uniq_len;
+	char uniquifier[0];
+};
+
 static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
 					     void *buffer, uint16_t maxbuf)
 {
 	const struct ceph_fs_client* fsc = cookie_netfs_data;
-	uint16_t klen;
+	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+	uint16_t fsid_len, uniq_len;
 
-	klen = sizeof(fsc->client->fsid);
-	if (klen > maxbuf)
+	fsid_len = sizeof(fsc->client->fsid);
+	uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+	if (fsid_len + uniq_len > maxbuf)
 		return 0;
 
-	memcpy(buffer, &fsc->client->fsid, klen);
-	return klen;
+	memcpy(buffer, &fsc->client->fsid, fsid_len);
+	if (uniq_len)
+		memcpy(buffer + fsid_len, fscache_uniq, uniq_len);
+
+	return fsid_len + uniq_len;
 }
 
 static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
@@ -67,13 +83,54 @@ void ceph_fscache_unregister(void)
 
 int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
 {
+	const struct ceph_fsid *fsid = &fsc->client->fsid;
+	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
+	size_t uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
+	struct ceph_fscache_entry *ent;
+	int err = 0;
+
+	mutex_lock(&ceph_fscache_lock);
+	list_for_each_entry(ent, &ceph_fscache_list, list) {
+		if (memcmp(&ent->fsid, fsid, sizeof(*fsid)))
+			continue;
+		if (ent->uniq_len != uniq_len)
+			continue;
+		if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
+			continue;
+
+		pr_err("fscache cookie already registered for fsid %pU\n", fsid);
+		pr_err("  use fsc=%%s mount option to specify a uniquifier\n");
+		err = -EBUSY;
+		goto out_unlock;
+	}
+
+	ent = kzalloc(sizeof(*ent) + uniq_len, GFP_KERNEL);
+	if (!ent) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
 	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
 					      &ceph_fscache_fsid_object_def,
 					      fsc, true);
-	if (!fsc->fscache)
-		pr_err("Unable to register fsid: %p fscache cookie\n", fsc);
 
-	return 0;
+	if (fsc->fscache) {
+		memcpy(&ent->fsid, fsid, sizeof(*fsid));
+		if (uniq_len > 0) {
+			memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
+			ent->uniq_len = uniq_len;
+		}
+		ent->fscache = fsc->fscache;
+		list_add_tail(&ent->list, &ceph_fscache_list);
+	} else {
+		kfree(ent);
+		pr_err("unable to register fscache cookie for fsid %pU\n",
+		       fsid);
+		/* all other fs ignore this error */
+	}
+out_unlock:
+	mutex_unlock(&ceph_fscache_lock);
+	return err;
 }
 
 static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
@@ -349,7 +406,24 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
 
 void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
 {
-	fscache_relinquish_cookie(fsc->fscache, 0);
+	if (fscache_cookie_valid(fsc->fscache)) {
+		struct ceph_fscache_entry *ent;
+		bool found = false;
+
+		mutex_lock(&ceph_fscache_lock);
+		list_for_each_entry(ent, &ceph_fscache_list, list) {
+			if (ent->fscache == fsc->fscache) {
+				list_del(&ent->list);
+				kfree(ent);
+				found = true;
+				break;
+			}
+		}
+		WARN_ON_ONCE(!found);
+		mutex_unlock(&ceph_fscache_lock);
+
+		__fscache_relinquish_cookie(fsc->fscache, 0);
+	}
 	fsc->fscache = NULL;
 }
 

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a3ebb63..7007ae2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c

@@ -1653,6 +1653,21 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	return -1;
 }
 
+bool __ceph_should_report_size(struct ceph_inode_info *ci)
+{
+	loff_t size = ci->vfs_inode.i_size;
+	/* mds will adjust max size according to the reported size */
+	if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
+		return false;
+	if (size >= ci->i_max_size)
+		return true;
+	/* half of previous max_size increment has been used */
+	if (ci->i_max_size > ci->i_reported_size &&
+	    (size << 1) >= ci->i_max_size + ci->i_reported_size)
+		return true;
+	return false;
+}
+
 /*
  * Swiss army knife function to examine currently used and wanted
  * versus held caps.  Release, flush, ack revoked caps to mds as
@@ -1806,8 +1821,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			}
 
 			/* approaching file_max? */
-			if ((inode->i_size << 1) >= ci->i_max_size &&
-			    (ci->i_reported_size << 1) < ci->i_max_size) {
+			if (__ceph_should_report_size(ci)) {
 				dout("i_size approaching max_size\n");
 				goto ack;
 			}
@@ -3027,8 +3041,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 					le32_to_cpu(grant->truncate_seq),
 					le64_to_cpu(grant->truncate_size),
 					size);
-		/* max size increase? */
-		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+	}
+
+	if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
+		if (max_size != ci->i_max_size) {
 			dout("max_size %lld -> %llu\n",
 			     ci->i_max_size, max_size);
 			ci->i_max_size = max_size;
@@ -3037,6 +3053,10 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 				ci->i_requested_max_size = 0;
 			}
 			wake = true;
+		} else if (ci->i_wanted_max_size > ci->i_max_size &&
+			   ci->i_wanted_max_size > ci->i_requested_max_size) {
+			/* CEPH_CAP_OP_IMPORT */
+			wake = true;
 		}
 	}
 
@@ -3554,7 +3574,6 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 	}
 
 	/* make sure we re-request max_size, if necessary */
-	ci->i_wanted_max_size = 0;
 	ci->i_requested_max_size = 0;
 
 	*old_issued = issued;
@@ -3790,6 +3809,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
  */
 void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 {
+	struct inode *inode;
 	struct ceph_inode_info *ci;
 	int flags = CHECK_CAPS_NODELAY;
 
@@ -3805,9 +3825,15 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 		    time_before(jiffies, ci->i_hold_caps_max))
 			break;
 		list_del_init(&ci->i_cap_delay_list);
+
+		inode = igrab(&ci->vfs_inode);
 		spin_unlock(&mdsc->cap_delay_lock);
-		dout("check_delayed_caps on %p\n", &ci->vfs_inode);
-		ceph_check_caps(ci, flags, NULL);
+
+		if (inode) {
+			dout("check_delayed_caps on %p\n", inode);
+			ceph_check_caps(ci, flags, NULL);
+			iput(inode);
+		}
 	}
 	spin_unlock(&mdsc->cap_delay_lock);
 }

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 29308a8..3d48c41 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -1040,8 +1040,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	int num_pages;
 	int written = 0;
 	int flags;
-	int check_caps = 0;
 	int ret;
+	bool check_caps = false;
 	struct timespec mtime = current_time(inode);
 	size_t count = iov_iter_count(from);
 

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4de6cdd..220dfd8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -1016,6 +1016,7 @@ static void update_dentry_lease(struct dentry *dentry,
 	long unsigned ttl = from_time + (duration * HZ) / 1000;
 	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
 	struct inode *dir;
+	struct ceph_mds_session *old_lease_session = NULL;
 
 	/*
 	 * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
@@ -1051,8 +1052,10 @@ static void update_dentry_lease(struct dentry *dentry,
 	    time_before(ttl, di->time))
 		goto out_unlock;  /* we already have a newer lease. */
 
-	if (di->lease_session && di->lease_session != session)
-		goto out_unlock;
+	if (di->lease_session && di->lease_session != session) {
+		old_lease_session = di->lease_session;
+		di->lease_session = NULL;
+	}
 
 	ceph_dentry_lru_touch(dentry);
 
@@ -1065,6 +1068,8 @@ static void update_dentry_lease(struct dentry *dentry,
 	di->time = ttl;
 out_unlock:
 	spin_unlock(&dentry->d_lock);
+	if (old_lease_session)
+		ceph_put_mds_session(old_lease_session);
 	return;
 }
 
@@ -1653,20 +1658,17 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	return err;
 }
 
-int ceph_inode_set_size(struct inode *inode, loff_t size)
+bool ceph_inode_set_size(struct inode *inode, loff_t size)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	int ret = 0;
+	bool ret;
 
 	spin_lock(&ci->i_ceph_lock);
 	dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
 	i_size_write(inode, size);
 	inode->i_blocks = calc_inode_blocks(size);
 
-	/* tell the MDS if we are approaching max_size */
-	if ((size << 1) >= ci->i_max_size &&
-	    (ci->i_reported_size << 1) < ci->i_max_size)
-		ret = 1;
+	ret = __ceph_should_report_size(ci);
 
 	spin_unlock(&ci->i_ceph_lock);
 	return ret;

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 6806dbe..64ae7447 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c

@@ -127,6 +127,29 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
 	     req->r_tid);
 
+	mutex_lock(&mdsc->mutex);
+	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+		err = 0;
+	} else {
+		/*
+		 * ensure we aren't running concurrently with
+		 * ceph_fill_trace or ceph_readdir_prepopulate, which
+		 * rely on locks (dir mutex) held by our caller.
+		 */
+		mutex_lock(&req->r_fill_mutex);
+		req->r_err = err;
+		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+		mutex_unlock(&req->r_fill_mutex);
+
+		if (!req->r_session) {
+			// haven't sent the request
+			err = 0;
+		}
+	}
+	mutex_unlock(&mdsc->mutex);
+	if (!err)
+		return 0;
+
 	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
 					    USE_AUTH_MDS);
 	if (IS_ERR(intr_req))
@@ -146,7 +169,7 @@ static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
 	if (err && err != -ERESTARTSYS)
 		return err;
 
-	wait_for_completion(&req->r_completion);
+	wait_for_completion_killable(&req->r_safe_completion);
 	return 0;
 }
 

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0c05df4..666a9f2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -3769,13 +3769,13 @@ static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
 void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
-
 	dout("mdsc_destroy %p\n", mdsc);
-	ceph_mdsc_stop(mdsc);
 
 	/* flush out any connection work with references to us */
 	ceph_msgr_flush();
 
+	ceph_mdsc_stop(mdsc);
+
 	fsc->mdsc = NULL;
 	kfree(mdsc);
 	dout("mdsc_destroy %p done\n", mdsc);

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8d7918c..aa06a8c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c

@@ -121,6 +121,7 @@ enum {
 	/* int args above */
 	Opt_snapdirname,
 	Opt_mds_namespace,
+	Opt_fscache_uniq,
 	Opt_last_string,
 	/* string args above */
 	Opt_dirstat,
@@ -158,6 +159,7 @@ static match_table_t fsopt_tokens = {
 	/* int args above */
 	{Opt_snapdirname, "snapdirname=%s"},
 	{Opt_mds_namespace, "mds_namespace=%s"},
+	{Opt_fscache_uniq, "fsc=%s"},
 	/* string args above */
 	{Opt_dirstat, "dirstat"},
 	{Opt_nodirstat, "nodirstat"},
@@ -223,6 +225,14 @@ static int parse_fsopt_token(char *c, void *private)
 		if (!fsopt->mds_namespace)
 			return -ENOMEM;
 		break;
+	case Opt_fscache_uniq:
+		fsopt->fscache_uniq = kstrndup(argstr[0].from,
+					       argstr[0].to-argstr[0].from,
+					       GFP_KERNEL);
+		if (!fsopt->fscache_uniq)
+			return -ENOMEM;
+		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		break;
 		/* misc */
 	case Opt_wsize:
 		fsopt->wsize = intval;
@@ -317,6 +327,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
 	kfree(args->snapdir_name);
 	kfree(args->mds_namespace);
 	kfree(args->server_path);
+	kfree(args->fscache_uniq);
 	kfree(args);
 }
 
@@ -350,10 +361,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 	ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
 	if (ret)
 		return ret;
-
 	ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
 	if (ret)
 		return ret;
+	ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
+	if (ret)
+		return ret;
 
 	return ceph_compare_options(new_opt, fsc->client);
 }
@@ -475,8 +488,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_puts(m, ",noasyncreaddir");
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 		seq_puts(m, ",nodcache");
-	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
-		seq_puts(m, ",fsc");
+	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
+		if (fsopt->fscache_uniq)
+			seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
+		else
+			seq_puts(m, ",fsc");
+	}
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 		seq_puts(m, ",nopoolperm");
 
@@ -597,18 +614,11 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
-	/* setup fscache */
-	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
-	    (ceph_fscache_register_fs(fsc) != 0))
-		goto fail_fscache;
-
 	/* caps */
 	fsc->min_caps = fsopt->max_readdir;
 
 	return fsc;
 
-fail_fscache:
-	ceph_fscache_unregister_fs(fsc);
 fail_trunc_wq:
 	destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
@@ -626,8 +636,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 {
 	dout("destroy_fs_client %p\n", fsc);
 
-	ceph_fscache_unregister_fs(fsc);
-
 	destroy_workqueue(fsc->wb_wq);
 	destroy_workqueue(fsc->pg_inv_wq);
 	destroy_workqueue(fsc->trunc_wq);
@@ -636,8 +644,6 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
 
 	destroy_mount_options(fsc->mount_options);
 
-	ceph_fs_debugfs_cleanup(fsc);
-
 	ceph_destroy_client(fsc->client);
 
 	kfree(fsc);
@@ -822,6 +828,13 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
 		if (err < 0)
 			goto out;
 
+		/* setup fscache */
+		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
+			err = ceph_fscache_register_fs(fsc);
+			if (err < 0)
+				goto out;
+		}
+
 		if (!fsc->mount_options->server_path) {
 			path = "";
 			dout("mount opening path \\t\n");
@@ -1040,6 +1053,12 @@ static void ceph_kill_sb(struct super_block *s)
 
 	ceph_mdsc_pre_umount(fsc->mdsc);
 	generic_shutdown_super(s);
+
+	fsc->client->extra_mon_dispatch = NULL;
+	ceph_fs_debugfs_cleanup(fsc);
+
+	ceph_fscache_unregister_fs(fsc);
+
 	ceph_mdsc_destroy(fsc);
 
 	destroy_fs_client(fsc);

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a973acd..f02a222 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -73,6 +73,7 @@ struct ceph_mount_options {
 	char *snapdir_name;   /* default ".snap" */
 	char *mds_namespace;  /* default NULL */
 	char *server_path;    /* default  "/" */
+	char *fscache_uniq;   /* default NULL */
 };
 
 struct ceph_fs_client {
@@ -793,7 +794,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 extern int ceph_inode_holds_cap(struct inode *inode, int mask);
 
-extern int ceph_inode_set_size(struct inode *inode, loff_t size);
+extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
 extern void __ceph_do_pending_vmtruncate(struct inode *inode);
 extern void ceph_queue_vmtruncate(struct inode *inode);
 
@@ -918,6 +919,7 @@ extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
+extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			    struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 75267cd..11263f1 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c

@@ -756,6 +756,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 	/* let's see if a virtual xattr was requested */
 	vxattr = ceph_match_vxattr(inode, name);
 	if (vxattr) {
+		err = ceph_do_getattr(inode, 0, true);
+		if (err)
+			return err;
 		err = -ENODATA;
 		if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
 			err = vxattr->getxattr_cb(ci, value, size);

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index afeefe79..f724361 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig

@@ -1,5 +1,5 @@
 config CIFS
-	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
+	tristate "SMB3 and CIFS support (advanced network filesystem)"
 	depends on INET
 	select NLS
 	select CRYPTO
@@ -10,28 +10,35 @@
 	select CRYPTO_ECB
 	select CRYPTO_DES
 	help
-	  This is the client VFS module for the Common Internet File System
-	  (CIFS) protocol which is the successor to the Server Message Block
-	  (SMB) protocol, the native file sharing mechanism for most early
-	  PC operating systems.  The CIFS protocol is fully supported by
-	  file servers such as Windows 2000 (including Windows 2003, Windows 2008,
-	  NT 4 and Windows XP) as well by Samba (which provides excellent CIFS
+	  This is the client VFS module for the SMB3 family of NAS protocols,
+	  as well as for earlier dialects such as SMB2.1, SMB2 and the
+	  Common Internet File System (CIFS) protocol.  CIFS was the successor
+	  to the original dialect, the Server Message Block (SMB) protocol, the
+	  native file sharing mechanism for most early PC operating systems.
+
+	  The SMB3 protocol is supported by most modern operating systems and
+	  NAS appliances (e.g. Samba, Windows 8, Windows 2012, MacOS).
+	  The older CIFS protocol was included in Windows NT4, 2000 and XP (and
+	  later) as well by Samba (which provides excellent CIFS and SMB3
 	  server support for Linux and many other operating systems). Limited
-	  support for OS/2 and Windows ME and similar servers is provided as
-	  well.
+	  support for OS/2 and Windows ME and similar very old servers is
+	  provided as well.
 
-	  The module also provides optional support for the followon
-	  protocols for CIFS including SMB3, which enables
-	  useful performance and security features (see the description
-	  of CONFIG_CIFS_SMB2).
-
-	  The cifs module provides an advanced network file system
-	  client for mounting to CIFS compliant servers.  It includes
+	  The cifs module provides an advanced network file system client
+	  for mounting to SMB3 (and CIFS) compliant servers.  It includes
 	  support for DFS (hierarchical name space), secure per-user
 	  session establishment via Kerberos or NTLM or NTLMv2,
 	  safe distributed caching (oplock), optional packet
 	  signing, Unicode and other internationalization improvements.
-	  If you need to mount to Samba or Windows from this machine, say Y.
+
+	  In general, the default dialects, SMB3 and later, enable better
+	  performance, security and features, than would be possible with CIFS.
+	  Note that when mounting to Samba, due to the CIFS POSIX extensions,
+	  CIFS mounts can provide slightly better POSIX compatibility
+	  than SMB3 mounts. SMB2/SMB3 mount options are also
+	  slightly simpler (compared to CIFS) due to protocol improvements.
+
+	  If you need to mount to Samba, Macs or Windows from this machine, say Y.
 
 config CIFS_STATS
         bool "CIFS statistics"
@@ -89,7 +96,7 @@
 	  Enables an upcall mechanism for CIFS which accesses userspace helper
 	  utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
 	  which are needed to mount to certain secure servers (for which more
-	  secure Kerberos authentication is required). If unsure, say N.
+	  secure Kerberos authentication is required). If unsure, say Y.
 
 config CIFS_XATTR
         bool "CIFS extended attributes"
@@ -105,7 +112,7 @@
           (used by some filesystems to store ACLs) is not supported at
           this time.
 
-          If unsure, say N.
+          If unsure, say Y.
 
 config CIFS_POSIX
         bool "CIFS POSIX Extensions"
@@ -125,7 +132,7 @@
 	  help
 	    Allows fetching CIFS/NTFS ACL from the server.  The DACL blob
 	    is handed over to the application/caller.  See the man
-	    page for getcifsacl for more information.
+	    page for getcifsacl for more information.  If unsure, say Y.
 
 config CIFS_DEBUG
 	bool "Enable CIFS debugging routines"
@@ -148,12 +155,13 @@
 
 config CIFS_DEBUG_DUMP_KEYS
 	bool "Dump encryption keys for offline decryption (Unsafe)"
-	depends on CIFS_DEBUG && CIFS_SMB2
+	depends on CIFS_DEBUG
 	help
 	   Enabling this will dump the encryption and decryption keys
 	   used to communicate on an encrypted share connection on the
 	   console. This allows Wireshark to decrypt and dissect
 	   encrypted network captures. Enable this carefully.
+	   If unsure, say N.
 
 config CIFS_DFS_UPCALL
 	  bool "DFS feature support"
@@ -166,7 +174,7 @@
 	    an upcall mechanism for CIFS which contacts userspace helper
 	    utilities to provide server name resolution (host names to
 	    IP addresses) which is needed for implicit mounts of DFS junction
-	    points. If unsure, say N.
+	    points. If unsure, say Y.
 
 config CIFS_NFSD_EXPORT
 	  bool "Allow nfsd to export CIFS file system"
@@ -174,38 +182,9 @@
 	  help
 	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
 
-config CIFS_SMB2
-	bool "SMB2 and SMB3 network file system support"
-	depends on CIFS
-	select KEYS
-	select FSCACHE
-	select DNS_RESOLVER
-	select CRYPTO_AES
-	select CRYPTO_SHA256
-	select CRYPTO_CMAC
-	select CRYPTO_AEAD2
-	select CRYPTO_CCM
-
-	help
-	  This enables support for the Server Message Block version 2
-	  family of protocols, including SMB3.  SMB3 support is
-	  enabled on mount by specifying "vers=3.0" in the mount
-	  options. These protocols are the successors to the popular
-	  CIFS and SMB network file sharing protocols. SMB3 is the
-	  native file sharing mechanism for the more recent
-	  versions of Windows (Windows 8 and Windows 2012 and
-	  later) and Samba server and many others support SMB3 well.
-	  In general SMB3 enables better performance, security
-	  and features, than would be possible with CIFS (Note that
-	  when mounting to Samba, due to the CIFS POSIX extensions,
-	  CIFS mounts can provide slightly better POSIX compatibility
-	  than SMB3 mounts do though). Note that SMB2/SMB3 mount
-	  options are also slightly simpler (compared to CIFS) due
-	  to protocol improvements.
-
 config CIFS_SMB311
 	bool "SMB3.1.1 network file system support (Experimental)"
-	depends on CIFS_SMB2
+	depends on CIFS
 
 	help
 	  This enables experimental support for the newest, SMB3.1.1, dialect.

diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index eed7eb0..5e853a3 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile

@@ -6,7 +6,9 @@
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
 	  cifs_unicode.o nterr.o cifsencrypt.o \
-	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o
+	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
+	  smb2ops.o smb2maperror.o smb2transport.o \
+	  smb2misc.o smb2pdu.o smb2inode.o smb2file.o
 
 cifs-$(CONFIG_CIFS_XATTR) += xattr.o
 cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
@@ -16,6 +18,3 @@
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
 
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
-
-cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \
-			    smb2misc.o smb2pdu.o smb2inode.o smb2file.o

diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index e0445e2..b380e08 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c

@@ -588,7 +588,6 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 	return j;
 }
 
-#ifdef CONFIG_CIFS_SMB2
 /*
  * cifs_local_to_utf16_bytes - how long will a string be after conversion?
  * @from - pointer to input string
@@ -647,4 +646,3 @@ cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
 	*utf16_len = len;
 	return dst;
 }
-#endif /* CONFIG_CIFS_SMB2 */

diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 8a79a34..8360b74 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h

@@ -116,11 +116,9 @@ char *cifs_strndup_from_utf16(const char *src, const int maxlen,
 extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
 			      const struct nls_table *cp, int mapChars);
 extern int cifs_remap(struct cifs_sb_info *cifs_sb);
-#ifdef CONFIG_CIFS_SMB2
 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
 				     int *utf16_len, const struct nls_table *cp,
 				     int remap);
-#endif /* CONFIG_CIFS_SMB2 */
 #endif
 
 wchar_t cifs_toupper(wchar_t in);

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9a1667e..556f480 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c

@@ -51,9 +51,7 @@
 #include <linux/key-type.h>
 #include "cifs_spnego.h"
 #include "fscache.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 int cifsFYI = 0;
 bool traceSMB;
@@ -277,9 +275,8 @@ cifs_alloc_inode(struct super_block *sb)
 	cifs_inode->uniqueid = 0;
 	cifs_inode->createtime = 0;
 	cifs_inode->epoch = 0;
-#ifdef CONFIG_CIFS_SMB2
 	generate_random_uuid(cifs_inode->lease_key);
-#endif
+
 	/*
 	 * Can not set i_flags here - they get immediately overwritten to zero
 	 * by the VFS.
@@ -1213,14 +1210,12 @@ cifs_destroy_inodecache(void)
 static int
 cifs_init_request_bufs(void)
 {
-	size_t max_hdr_size = MAX_CIFS_HDR_SIZE;
-#ifdef CONFIG_CIFS_SMB2
 	/*
 	 * SMB2 maximum header size is bigger than CIFS one - no problems to
 	 * allocate some more bytes for CIFS.
 	 */
-	max_hdr_size = MAX_SMB2_HDR_SIZE;
-#endif
+	size_t max_hdr_size = MAX_SMB2_HDR_SIZE;
+
 	if (CIFSMaxBufSize < 8192) {
 	/* Buffer size can not be smaller than 2 * PATH_MAX since maximum
 	Unicode path name has to fit in any SMB/CIFS path based frames */
@@ -1476,12 +1471,10 @@ MODULE_SOFTDEP("pre: hmac");
 MODULE_SOFTDEP("pre: md4");
 MODULE_SOFTDEP("pre: md5");
 MODULE_SOFTDEP("pre: nls");
-#ifdef CONFIG_CIFS_SMB2
 MODULE_SOFTDEP("pre: aes");
 MODULE_SOFTDEP("pre: cmac");
 MODULE_SOFTDEP("pre: sha256");
 MODULE_SOFTDEP("pre: aead2");
 MODULE_SOFTDEP("pre: ccm");
-#endif /* CONFIG_CIFS_SMB2 */
 module_init(init_cifs)
 module_exit(exit_cifs)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bcc7d9a..221693f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h

@@ -29,9 +29,7 @@
 #include <crypto/internal/hash.h>
 #include <linux/scatterlist.h>
 #include <uapi/linux/cifs/cifs_mount.h>
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 #define CIFS_MAGIC_NUMBER 0xFF534D42      /* the first four bytes of SMB PDUs */
 
@@ -367,6 +365,8 @@ struct smb_version_operations {
 	unsigned int (*calc_smb_size)(void *);
 	/* check for STATUS_PENDING and process it in a positive case */
 	bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
+	/* check for STATUS_NETWORK_SESSION_EXPIRED */
+	bool (*is_session_expired)(char *);
 	/* send oplock break response */
 	int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
 			       struct cifsInodeInfo *);
@@ -610,12 +610,10 @@ struct TCP_Server_Info {
 	__u16 sec_mode;
 	bool sign; /* is signing enabled on this connection? */
 	bool session_estab; /* mark when very first sess is established */
-#ifdef CONFIG_CIFS_SMB2
 	int echo_credits;  /* echo reserved slots */
 	int oplock_credits;  /* oplock break reserved slots */
 	bool echoes:1; /* enable echoes */
 	__u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */
-#endif
 	u16 dialect; /* dialect index that server chose */
 	bool oplocks:1; /* enable oplocks */
 	unsigned int maxReq;	/* Clients should submit no more */
@@ -659,13 +657,11 @@ struct TCP_Server_Info {
 	atomic_t in_send; /* requests trying to send */
 	atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
 #endif
-#ifdef CONFIG_CIFS_SMB2
 	unsigned int	max_read;
 	unsigned int	max_write;
 	__u8		preauth_hash[512];
 	struct delayed_work reconnect; /* reconnect workqueue job */
 	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
-#endif /* CONFIG_CIFS_SMB2 */
 	unsigned long echo_interval;
 };
 
@@ -847,13 +843,11 @@ struct cifs_ses {
 	bool sign;		/* is signing required? */
 	bool need_reconnect:1; /* connection reset, uid now invalid */
 	bool domainAuto:1;
-#ifdef CONFIG_CIFS_SMB2
 	__u16 session_flags;
 	__u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 preauth_hash[512];
-#endif /* CONFIG_CIFS_SMB2 */
 };
 
 static inline bool
@@ -905,12 +899,10 @@ struct cifs_tcon {
 			atomic_t num_acl_get;
 			atomic_t num_acl_set;
 		} cifs_stats;
-#ifdef CONFIG_CIFS_SMB2
 		struct {
 			atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS];
 			atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS];
 		} smb2_stats;
-#endif /* CONFIG_CIFS_SMB2 */
 	} stats;
 #ifdef CONFIG_CIFS_STATS2
 	unsigned long long time_writes;
@@ -946,7 +938,6 @@ struct cifs_tcon {
 	bool need_reopen_files:1; /* need to reopen tcon file handles */
 	bool use_resilient:1; /* use resilient instead of durable handles */
 	bool use_persistent:1; /* use persistent instead of durable handles */
-#ifdef CONFIG_CIFS_SMB2
 	bool print:1;		/* set if connection to printer share */
 	__le32 capabilities;
 	__u32 share_flags;
@@ -959,7 +950,6 @@ struct cifs_tcon {
 	__u32 max_chunks;
 	__u32 max_bytes_chunk;
 	__u32 max_bytes_copy;
-#endif /* CONFIG_CIFS_SMB2 */
 #ifdef CONFIG_CIFS_FSCACHE
 	u64 resource_id;		/* server resource id */
 	struct fscache_cookie *fscache;	/* cookie for share */
@@ -1062,12 +1052,10 @@ struct cifs_open_parms {
 
 struct cifs_fid {
 	__u16 netfid;
-#ifdef CONFIG_CIFS_SMB2
 	__u64 persistent_fid;	/* persist file id for smb2 */
 	__u64 volatile_fid;	/* volatile file id for smb2 */
 	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for smb2 */
 	__u8 create_guid[16];
-#endif
 	struct cifs_pending_open *pending_open;
 	unsigned int epoch;
 	bool purge_cache;
@@ -1105,10 +1093,8 @@ struct cifsFileInfo {
 
 struct cifs_io_parms {
 	__u16 netfid;
-#ifdef CONFIG_CIFS_SMB2
 	__u64 persistent_fid;	/* persist file id for smb2 */
 	__u64 volatile_fid;	/* volatile file id for smb2 */
-#endif
 	__u32 pid;
 	__u64 offset;
 	unsigned int length;
@@ -1234,9 +1220,7 @@ struct cifsInodeInfo {
 	u64  server_eof;		/* current file size on server -- protected by i_lock */
 	u64  uniqueid;			/* server inode number */
 	u64  createtime;		/* creation time on server */
-#ifdef CONFIG_CIFS_SMB2
 	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for this inode */
-#endif
 #ifdef CONFIG_CIFS_FSCACHE
 	struct fscache_cookie *fscache;
 #endif

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index fbb0d4c..72a53bd 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c

@@ -1460,6 +1460,13 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		return length;
 	server->total_read += length;
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, 0)) {
 		cifs_discard_remaining_data(server);

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9365c0c..59647eb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c

@@ -55,9 +55,7 @@
 #include "nterr.h"
 #include "rfc1002pdu.h"
 #include "fscache.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2proto.h"
-#endif
 
 #define CIFS_PORT 445
 #define RFC1001_PORT 139
@@ -341,9 +339,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		server->tcpStatus = CifsNeedReconnect;
 	spin_unlock(&GlobalMid_Lock);
 	server->maxBuf = 0;
-#ifdef CONFIG_CIFS_SMB2
 	server->max_read = 0;
-#endif
 
 	cifs_dbg(FYI, "Reconnecting tcp session\n");
 
@@ -812,6 +808,13 @@ cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		cifs_dump_mem("Bad SMB: ", buf,
 			min_t(unsigned int, server->total_read, 48));
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, length))
 		return -1;
@@ -1122,7 +1125,6 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->ops = &smb1_operations;
 		vol->vals = &smb1_values;
 		break;
-#ifdef CONFIG_CIFS_SMB2
 	case Smb_20:
 		vol->ops = &smb20_operations;
 		vol->vals = &smb20_values;
@@ -1145,7 +1147,6 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->vals = &smb311_values;
 		break;
 #endif /* SMB311 */
-#endif
 	default:
 		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
 		return 1;
@@ -1271,9 +1272,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
-	/* FIXME: add autonegotiation -- for now, SMB1 is default */
-	vol->ops = &smb1_operations;
-	vol->vals = &smb1_values;
+	/* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
+	vol->ops = &smb30_operations; /* both secure and accepted widely */
+	vol->vals = &smb30_values;
 
 	vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
 
@@ -2170,7 +2171,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 
 	cancel_delayed_work_sync(&server->echo);
 
-#ifdef CONFIG_CIFS_SMB2
 	if (from_reconnect)
 		/*
 		 * Avoid deadlock here: reconnect work calls
@@ -2181,7 +2181,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 		cancel_delayed_work(&server->reconnect);
 	else
 		cancel_delayed_work_sync(&server->reconnect);
-#endif
 
 	spin_lock(&GlobalMid_Lock);
 	server->tcpStatus = CifsExiting;
@@ -2247,17 +2246,13 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
 	INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
 	INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
-#ifdef CONFIG_CIFS_SMB2
 	INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
 	mutex_init(&tcp_ses->reconnect_mutex);
-#endif
 	memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
 	       sizeof(tcp_ses->srcaddr));
 	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
 		sizeof(tcp_ses->dstaddr));
-#ifdef CONFIG_CIFS_SMB2
 	generate_random_uuid(tcp_ses->client_guid);
-#endif
 	/*
 	 * at this point we are the only ones with the pointer
 	 * to the struct since the kernel thread not created yet
@@ -2655,10 +2650,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 		return 0;
 	if (tcon->seal != volume_info->seal)
 		return 0;
-#ifdef CONFIG_CIFS_SMB2
 	if (tcon->snapshot_time != volume_info->snapshot_time)
 		return 0;
-#endif /* CONFIG_CIFS_SMB2 */
 	return 1;
 }
 
@@ -2733,7 +2726,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	}
 
 	if (volume_info->snapshot_time) {
-#ifdef CONFIG_CIFS_SMB2
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "Use SMB2 or later for snapshot mount option\n");
@@ -2741,11 +2733,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			goto out_fail;
 		} else
 			tcon->snapshot_time = volume_info->snapshot_time;
-#else
-		cifs_dbg(VFS, "Snapshot mount option requires SMB2 support\n");
-		rc = -EOPNOTSUPP;
-		goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 	}
 
 	tcon->ses = ses;
@@ -2781,7 +2768,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			     "SMB3 or later required for persistent handles\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#ifdef CONFIG_CIFS_SMB2
 		} else if (ses->server->capabilities &
 			   SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
 			tcon->use_persistent = true;
@@ -2790,15 +2776,12 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 				"Persistent handles not supported on share\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 		}
-#ifdef CONFIG_CIFS_SMB2
 	} else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY)
 	     && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
 	     && (volume_info->nopersistent == false)) {
 		cifs_dbg(FYI, "enabling persistent handles\n");
 		tcon->use_persistent = true;
-#endif /* CONFIG_CIFS_SMB2 */
 	} else if (volume_info->resilient) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
@@ -2815,7 +2798,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 				 "SMB3 or later required for encryption\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#ifdef CONFIG_CIFS_SMB2
 		} else if (tcon->ses->server->capabilities &
 					SMB2_GLOBAL_CAP_ENCRYPTION)
 			tcon->seal = true;
@@ -2823,7 +2805,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 			cifs_dbg(VFS, "Encryption is not supported on share\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
-#endif /* CONFIG_CIFS_SMB2 */
 		}
 	}
 
@@ -3738,14 +3719,12 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 		goto mount_fail_check;
 	}
 
-#ifdef CONFIG_CIFS_SMB2
 	if ((volume_info->persistent == true) && ((ses->server->capabilities &
 		SMB2_GLOBAL_CAP_PERSISTENT_HANDLES) == 0)) {
 		cifs_dbg(VFS, "persistent handles not supported by server\n");
 		rc = -EOPNOTSUPP;
 		goto mount_fail_check;
 	}
-#endif /* CONFIG_CIFS_SMB2*/
 
 	/* search for existing tcon to this server share */
 	tcon = cifs_get_tcon(ses, volume_info);

diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 76fb091..54f32f9 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c

@@ -101,7 +101,6 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 	fsinf->fs_attributes = le32_to_cpu(tcon->fsAttrInfo.Attributes);
 	fsinf->max_path_component =
 		le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength);
-#ifdef CONFIG_CIFS_SMB2
 	fsinf->vol_serial_number = tcon->vol_serial_number;
 	fsinf->vol_create_time = le64_to_cpu(tcon->vol_create_time);
 	fsinf->share_flags = tcon->share_flags;
@@ -110,7 +109,6 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
 	fsinf->optimal_sector_size = tcon->perf_sector_size;
 	fsinf->max_bytes_chunk = tcon->max_bytes_chunk;
 	fsinf->maximal_access = tcon->maximal_access;
-#endif /* SMB2 */
 	fsinf->cifs_posix_caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 
 	if (copy_to_user(arg, fsinf, sizeof(struct smb_mnt_fs_info)))

diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index c4d996f..60b5a11 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c

@@ -29,9 +29,7 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2proto.h"
-#endif
 
 /*
  * M-F Symlink Functions - Begin
@@ -402,7 +400,6 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 /*
  * SMB 2.1/SMB3 Protocol specific functions
  */
-#ifdef CONFIG_CIFS_SMB2
 int
 smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 		      struct cifs_sb_info *cifs_sb, const unsigned char *path,
@@ -525,7 +522,6 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 	kfree(utf16_path);
 	return rc;
 }
-#endif /* CONFIG_CIFS_SMB2 */
 
 /*
  * M-F Symlink Functions - End

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3b147dc..eea93ac 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c

@@ -30,9 +30,7 @@
 #include "smberr.h"
 #include "nterr.h"
 #include "cifs_unicode.h"
-#ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
-#endif
 
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
@@ -149,15 +147,12 @@ struct smb_hdr *
 cifs_buf_get(void)
 {
 	struct smb_hdr *ret_buf = NULL;
-	size_t buf_size = sizeof(struct smb_hdr);
-
-#ifdef CONFIG_CIFS_SMB2
 	/*
 	 * SMB2 header is bigger than CIFS one - no problems to clean some
 	 * more bytes for CIFS.
 	 */
-	buf_size = sizeof(struct smb2_hdr);
-#endif
+	size_t buf_size = sizeof(struct smb2_hdr);
+
 	/*
 	 * We could use negotiated size instead of max_msgsize -
 	 * but it may be more efficient to always alloc same size
@@ -620,9 +615,7 @@ void
 cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink,
 			     struct cifs_pending_open *open)
 {
-#ifdef CONFIG_CIFS_SMB2
 	memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
-#endif
 	open->oplock = CIFS_OPLOCK_NO_CHANGE;
 	open->tlink = tlink;
 	fid->pending_open = open;

diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 3030a9d..7ca9808 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c

@@ -2475,8 +2475,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
 
 	/* on error mapping not found  - return EIO */
 
-	cifs_dbg(FYI, "Mapping SMB2 status code %d to POSIX err %d\n",
-		 smb2err, rc);
+	cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n",
+		 __le32_to_cpu(smb2err), rc);
 
 	return rc;
 }

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index ccbb397..cfacf2c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c

@@ -1036,6 +1036,18 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
 	return true;
 }
 
+static bool
+smb2_is_session_expired(char *buf)
+{
+	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+
+	if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+		return false;
+
+	cifs_dbg(FYI, "Session expired\n");
+	return true;
+}
+
 static int
 smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 		     struct cifsInodeInfo *cinode)
@@ -1370,6 +1382,63 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
 	return pntsd;
 }
 
+#ifdef CONFIG_CIFS_ACL
+static int
+set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
+		struct inode *inode, const char *path, int aclflag)
+{
+	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+	unsigned int xid;
+	int rc, access_flags = 0;
+	struct cifs_tcon *tcon;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+	struct cifs_fid fid;
+	struct cifs_open_parms oparms;
+	__le16 *utf16_path;
+
+	cifs_dbg(FYI, "set smb3 acl for path %s\n", path);
+	if (IS_ERR(tlink))
+		return PTR_ERR(tlink);
+
+	tcon = tlink_tcon(tlink);
+	xid = get_xid();
+
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
+
+	if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
+		access_flags = WRITE_OWNER;
+	else
+		access_flags = WRITE_DAC;
+
+	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+	if (!utf16_path)
+		return -ENOMEM;
+
+	oparms.tcon = tcon;
+	oparms.desired_access = access_flags;
+	oparms.disposition = FILE_OPEN;
+	oparms.path = path;
+	oparms.fid = &fid;
+	oparms.reconnect = false;
+
+	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+	kfree(utf16_path);
+	if (!rc) {
+		rc = SMB2_set_acl(xid, tlink_tcon(tlink), fid.persistent_fid,
+			    fid.volatile_fid, pnntsd, acllen, aclflag);
+		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	}
+
+	cifs_put_tlink(tlink);
+	free_xid(xid);
+	return rc;
+}
+#endif /* CIFS_ACL */
+
 /* Retrieve an ACL from the server */
 static struct cifs_ntsd *
 get_smb2_acl(struct cifs_sb_info *cifs_sb,
@@ -2160,6 +2229,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 		return -ENOTSUPP;
 	}
 
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server);
+		wake_up(&server->response_q);
+		return -1;
+	}
+
 	if (server->ops->is_status_pending &&
 			server->ops->is_status_pending(buf, server, 0))
 		return -1;
@@ -2477,6 +2553,7 @@ struct smb_version_operations smb20_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2498,7 +2575,7 @@ struct smb_version_operations smb20_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
@@ -2565,6 +2642,7 @@ struct smb_version_operations smb21_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2587,7 +2665,7 @@ struct smb_version_operations smb21_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
@@ -2655,6 +2733,7 @@ struct smb_version_operations smb30_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
@@ -2686,7 +2765,7 @@ struct smb_version_operations smb30_operations = {
 #ifdef CONFIG_CIFS_ACL
 	.get_acl = get_smb2_acl,
 	.get_acl_by_fid = get_smb2_acl_by_fid,
-/*	.set_acl = set_smb3_acl, */
+	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
 };
 
@@ -2755,6 +2834,7 @@ struct smb_version_operations smb311_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.is_session_expired = smb2_is_session_expired,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 4938e8b..5fb2fc2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c

@@ -1167,15 +1167,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	int rc = 0;
 	int resp_buftype;
 	int unc_path_len;
-	struct TCP_Server_Info *server;
 	__le16 *unc_path = NULL;
 	int flags = 0;
 
 	cifs_dbg(FYI, "TCON\n");
 
-	if ((ses->server) && tree)
-		server = ses->server;
-	else
+	if (!(ses->server) || !tree)
 		return -EIO;
 
 	unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
@@ -1294,15 +1291,12 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 {
 	struct smb2_tree_disconnect_req *req; /* response is trivial */
 	int rc = 0;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
 	cifs_dbg(FYI, "Tree Disconnect\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
@@ -1794,7 +1788,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	struct smb2_ioctl_req *req;
 	struct smb2_ioctl_rsp *rsp;
 	struct smb2_sync_hdr *shdr;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct kvec iov[2];
 	struct kvec rsp_iov;
@@ -1817,9 +1810,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	else
 		return -EIO;
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req);
@@ -1977,7 +1968,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 {
 	struct smb2_close_req *req;
 	struct smb2_close_rsp *rsp;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
@@ -1987,9 +1977,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 
 	cifs_dbg(FYI, "Close\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req);
@@ -2091,15 +2079,12 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
 	struct kvec rsp_iov;
 	int rc = 0;
 	int resp_buftype;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
 	cifs_dbg(FYI, "Query Info\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
@@ -2311,7 +2296,6 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	   u64 volatile_fid)
 {
 	struct smb2_flush_req *req;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
@@ -2321,9 +2305,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 
 	cifs_dbg(FYI, "Flush\n");
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req);
@@ -3000,8 +2982,9 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 
 static int
 send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
-	       u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class,
-	       unsigned int num, void **data, unsigned int *size)
+	       u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
+	       u8 info_type, u32 additional_info, unsigned int num,
+		void **data, unsigned int *size)
 {
 	struct smb2_set_info_req *req;
 	struct smb2_set_info_rsp *rsp = NULL;
@@ -3010,13 +2993,10 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc = 0;
 	int resp_buftype;
 	unsigned int i;
-	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = 0;
 
-	if (ses && (ses->server))
-		server = ses->server;
-	else
+	if (!ses || !(ses->server))
 		return -EIO;
 
 	if (!num)
@@ -3037,10 +3017,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid);
 
-	req->InfoType = SMB2_O_INFO_FILE;
+	req->InfoType = info_type;
 	req->FileInfoClass = info_class;
 	req->PersistentFileId = persistent_fid;
 	req->VolatileFileId = volatile_fid;
+	req->AdditionalInformation = cpu_to_le32(additional_info);
 
 	/* 4 for RFC1001 length and 1 for Buffer */
 	req->BufferOffset =
@@ -3100,8 +3081,8 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   current->tgid, FILE_RENAME_INFORMATION, 2, data,
-			   size);
+		current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE,
+		0, 2, data, size);
 	kfree(data);
 	return rc;
 }
@@ -3118,8 +3099,8 @@ SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
 	size = 1; /* sizeof __u8 */
 
 	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			current->tgid, FILE_DISPOSITION_INFORMATION, 1, &data,
-			&size);
+		current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE,
+		0, 1, &data, &size);
 }
 
 int
@@ -3148,7 +3129,8 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   current->tgid, FILE_LINK_INFORMATION, 2, data, size);
+			current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE,
+			0, 2, data, size);
 	kfree(data);
 	return rc;
 }
@@ -3168,10 +3150,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 
 	if (is_falloc)
 		return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size);
+			pid, FILE_ALLOCATION_INFORMATION, SMB2_O_INFO_FILE,
+			0, 1, &data, &size);
 	else
 		return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
+			pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
+			0, 1, &data, &size);
 }
 
 int
@@ -3181,8 +3165,18 @@ SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 	unsigned int size;
 	size = sizeof(FILE_BASIC_INFO);
 	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			     current->tgid, FILE_BASIC_INFORMATION, 1,
-			     (void **)&buf, &size);
+		current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE,
+		0, 1, (void **)&buf, &size);
+}
+
+int
+SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
+		u64 persistent_fid, u64 volatile_fid,
+		struct cifs_ntsd *pnntsd, int pacllen, int aclflag)
+{
+	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+			current->tgid, 0, SMB2_O_INFO_SECURITY, aclflag,
+			1, (void **)&pnntsd, &pacllen);
 }
 
 int

diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3595cd7..1cadaf9 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h

@@ -166,6 +166,9 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_fid, u64 volatile_fid,
 			 FILE_BASIC_INFO *buf);
+extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
+			u64 persistent_fid, u64 volatile_fid,
+			struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
 extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 				u64 persistent_fid, u64 volatile_fid);
 extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,

diff --git a/fs/dcache.c b/fs/dcache.c
index 7ece68d..6c30be6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -1160,11 +1160,12 @@ void shrink_dcache_sb(struct super_block *sb)
 		LIST_HEAD(dispose);
 
 		freed = list_lru_walk(&sb->s_dentry_lru,
-			dentry_lru_isolate_shrink, &dispose, UINT_MAX);
+			dentry_lru_isolate_shrink, &dispose, 1024);
 
 		this_cpu_sub(nr_dentry_unused, freed);
 		shrink_dentry_list(&dispose);
-	} while (freed > 0);
+		cond_resched();
+	} while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index b1c8e23..e767e43 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c

@@ -960,10 +960,14 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 	mutex_lock(&ep->mtx);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+		struct inode *inode = file_inode(epi->ffd.file);
 
-		seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+		seq_printf(m, "tfd: %8d events: %8x data: %16llx "
+			   " pos:%lli ino:%lx sdev:%x\n",
 			   epi->ffd.fd, epi->event.events,
-			   (long long)epi->event.data);
+			   (long long)epi->event.data,
+			   (long long)epi->ffd.file->f_pos,
+			   inode->i_ino, inode->i_sb->s_dev);
 		if (seq_has_overflowed(m))
 			break;
 	}
@@ -1073,6 +1077,50 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	return epir;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
+{
+	struct rb_node *rbp;
+	struct epitem *epi;
+
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		epi = rb_entry(rbp, struct epitem, rbn);
+		if (epi->ffd.fd == tfd) {
+			if (toff == 0)
+				return epi;
+			else
+				toff--;
+		}
+		cond_resched();
+	}
+
+	return NULL;
+}
+
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
+				     unsigned long toff)
+{
+	struct file *file_raw;
+	struct eventpoll *ep;
+	struct epitem *epi;
+
+	if (!is_file_epoll(file))
+		return ERR_PTR(-EINVAL);
+
+	ep = file->private_data;
+
+	mutex_lock(&ep->mtx);
+	epi = ep_find_tfd(ep, tfd, toff);
+	if (epi)
+		file_raw = epi->ffd.file;
+	else
+		file_raw = ERR_PTR(-ENOENT);
+	mutex_unlock(&ep->mtx);
+
+	return file_raw;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * This is the callback that is passed to the wait queue wakeup
  * mechanism. It is called by the stored file descriptors when they
@@ -1748,6 +1796,16 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
+			/*
+			 * Always short-circuit for fatal signals to allow
+			 * threads to make a timely exit without the chance of
+			 * finding more events available and fetching
+			 * repeatedly.
+			 */
+			if (fatal_signal_pending(current)) {
+				res = -EINTR;
+				break;
+			}
 			if (ep_events_available(ep) || timed_out)
 				break;
 			if (signal_pending(current)) {

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2dcbd56..30163d0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c

@@ -659,6 +659,7 @@ static int ext2_get_blocks(struct inode *inode,
 				 */
 				err = -EAGAIN;
 				count = 0;
+				partial = chain + depth - 1;
 				break;
 			}
 			blk = le32_to_cpu(*(chain[depth-1].p + count));

diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index ca949ea..a0dc559 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile

@@ -2,7 +2,7 @@
 
 f2fs-y		:= dir.o file.o inode.o namei.o hash.o super.o inline.o
 f2fs-y		+= checkpoint.o gc.o data.o node.o segment.o recovery.o
-f2fs-y		+= shrinker.o extent_cache.o
+f2fs-y		+= shrinker.o extent_cache.o sysfs.o
 f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
 f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
 f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 8f48769..a140c5e 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c

@@ -233,7 +233,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 		value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
 		if (IS_ERR(value)) {
 			clear_inode_flag(inode, FI_ACL_MODE);
-			return (int)PTR_ERR(value);
+			return PTR_ERR(value);
 		}
 	}
 

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ea9c317b..56bbf59 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c

@@ -31,7 +31,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
 	set_ckpt_flags(sbi, CP_ERROR_FLAG);
 	sbi->sb->s_flags |= MS_RDONLY;
 	if (!end_io)
-		f2fs_flush_merged_bios(sbi);
+		f2fs_flush_merged_writes(sbi);
 }
 
 /*
@@ -162,6 +162,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 		.op = REQ_OP_READ,
 		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 	struct blk_plug plug;
 
@@ -207,12 +208,10 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
 		}
 
 		fio.page = page;
-		fio.old_blkaddr = fio.new_blkaddr;
-		f2fs_submit_page_mbio(&fio);
+		f2fs_submit_page_bio(&fio);
 		f2fs_put_page(page, 0);
 	}
 out:
-	f2fs_submit_merged_bio(sbi, META, READ);
 	blk_finish_plug(&plug);
 	return blkno - start;
 }
@@ -249,13 +248,13 @@ static int f2fs_write_meta_page(struct page *page,
 	dec_page_count(sbi, F2FS_DIRTY_META);
 
 	if (wbc->for_reclaim)
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
-						0, page->index, META, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+						0, page->index, META);
 
 	unlock_page(page);
 
 	if (unlikely(f2fs_cp_error(sbi)))
-		f2fs_submit_merged_bio(sbi, META, WRITE);
+		f2fs_submit_merged_write(sbi, META);
 
 	return 0;
 
@@ -270,6 +269,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
 	long diff, written;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	/* collect a number of dirty meta pages and write together */
 	if (wbc->for_kupdate ||
 		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
@@ -358,7 +360,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
 	}
 stop:
 	if (nwritten)
-		f2fs_submit_merged_bio(sbi, type, WRITE);
+		f2fs_submit_merged_write(sbi, type);
 
 	blk_finish_plug(&plug);
 
@@ -906,7 +908,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 		 * We should submit bio, since it exists several
 		 * wribacking dentry pages in the freeing inode.
 		 */
-		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		f2fs_submit_merged_write(sbi, DATA);
 		cond_resched();
 	}
 	goto retry;
@@ -1051,8 +1053,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	unsigned long flags;
 
-	spin_lock(&sbi->cp_lock);
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 
 	if ((cpc->reason & CP_UMOUNT) &&
 			le32_to_cpu(ckpt->cp_pack_total_block_count) >
@@ -1083,14 +1086,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
+	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
 	block_t start_blk;
 	unsigned int data_sum_blocks, orphan_blocks;
 	__u32 crc32 = 0;
@@ -1132,12 +1135,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* 2 cp  + n data seg summary + orphan inode blocks */
 	data_sum_blocks = npages_for_summary_flush(sbi, false);
-	spin_lock(&sbi->cp_lock);
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
 		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 	else
 		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 
 	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1295,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
 
-	f2fs_flush_merged_bios(sbi);
+	f2fs_flush_merged_writes(sbi);
 
 	/* this is the case of multiple fstrims without any changes */
 	if (cpc->reason & CP_DISCARD) {

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 36fe820..87c1f41 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c

@@ -282,29 +282,32 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
 				nid_t ino, pgoff_t idx, enum page_type type)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	struct f2fs_bio_info *io = &sbi->write_io[btype];
-	bool ret;
+	enum temp_type temp;
+	struct f2fs_bio_info *io;
+	bool ret = false;
 
-	down_read(&io->io_rwsem);
-	ret = __has_merged_page(io, inode, ino, idx);
-	up_read(&io->io_rwsem);
+	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+		io = sbi->write_io[btype] + temp;
+
+		down_read(&io->io_rwsem);
+		ret = __has_merged_page(io, inode, ino, idx);
+		up_read(&io->io_rwsem);
+
+		/* TODO: use HOT temp only for meta pages now. */
+		if (ret || btype == META)
+			break;
+	}
 	return ret;
 }
 
-static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
-				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw)
+static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
+				enum page_type type, enum temp_type temp)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
-	struct f2fs_bio_info *io;
-
-	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
+	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 
 	down_write(&io->io_rwsem);
 
-	if (!__has_merged_page(io, inode, ino, idx))
-		goto out;
-
 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
@@ -314,29 +317,45 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
-out:
 	up_write(&io->io_rwsem);
 }
 
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
-									int rw)
-{
-	__f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
-}
-
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw)
+				enum page_type type, bool force)
 {
-	if (has_merged_page(sbi, inode, ino, idx, type))
-		__f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
+	enum temp_type temp;
+
+	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
+		return;
+
+	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
+
+		__f2fs_submit_merged_write(sbi, type, temp);
+
+		/* TODO: use HOT temp only for meta pages now. */
+		if (type >= META)
+			break;
+	}
 }
 
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 {
-	f2fs_submit_merged_bio(sbi, DATA, WRITE);
-	f2fs_submit_merged_bio(sbi, NODE, WRITE);
-	f2fs_submit_merged_bio(sbi, META, WRITE);
+	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+}
+
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type)
+{
+	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
+}
+
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
+{
+	f2fs_submit_merged_write(sbi, DATA);
+	f2fs_submit_merged_write(sbi, NODE);
+	f2fs_submit_merged_write(sbi, META);
 }
 
 /*
@@ -368,16 +387,29 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	return 0;
 }
 
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_write(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
-	struct f2fs_bio_info *io;
-	bool is_read = is_read_io(fio->op);
+	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 	struct page *bio_page;
 	int err = 0;
 
-	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
+	f2fs_bug_on(sbi, is_read_io(fio->op));
+
+	down_write(&io->io_rwsem);
+next:
+	if (fio->in_list) {
+		spin_lock(&io->io_lock);
+		if (list_empty(&io->io_list)) {
+			spin_unlock(&io->io_lock);
+			goto out_fail;
+		}
+		fio = list_first_entry(&io->io_list,
+						struct f2fs_io_info, list);
+		list_del(&fio->list);
+		spin_unlock(&io->io_lock);
+	}
 
 	if (fio->old_blkaddr != NEW_ADDR)
 		verify_block_addr(sbi, fio->old_blkaddr);
@@ -388,10 +420,7 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	/* set submitted = 1 as a return value */
 	fio->submitted = 1;
 
-	if (!is_read)
-		inc_page_count(sbi, WB_DATA_TYPE(bio_page));
-
-	down_write(&io->io_rwsem);
+	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
 	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
@@ -402,26 +431,28 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		if ((fio->type == DATA || fio->type == NODE) &&
 				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 			err = -EAGAIN;
-			if (!is_read)
-				dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 			goto out_fail;
 		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
-						BIO_MAX_PAGES, is_read);
+						BIO_MAX_PAGES, false);
 		io->fio = *fio;
 	}
 
-	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
-							PAGE_SIZE) {
+	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
 		__submit_merged_bio(io);
 		goto alloc_new;
 	}
 
 	io->last_block_in_bio = fio->new_blkaddr;
 	f2fs_trace_ios(fio, 0);
+
+	trace_f2fs_submit_page_write(fio->page, fio);
+
+	if (fio->in_list)
+		goto next;
 out_fail:
 	up_write(&io->io_rwsem);
-	trace_f2fs_submit_page_mbio(fio->page, fio);
 	return err;
 }
 
@@ -460,14 +491,15 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
 int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
+	int err;
 
 	if (!count)
 		return 0;
 
 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
-	if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
-		return -ENOSPC;
+	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+		return err;
 
 	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
 						dn->ofs_in_node, count);
@@ -718,6 +750,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	struct node_info ni;
 	pgoff_t fofs;
 	blkcnt_t count = 1;
+	int err;
 
 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
 		return -EPERM;
@@ -726,15 +759,15 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	if (dn->data_blkaddr == NEW_ADDR)
 		goto alloc;
 
-	if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
-		return -ENOSPC;
+	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+		return err;
 
 alloc:
 	get_node_info(sbi, dn->nid, &ni);
 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
 	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-						&sum, CURSEG_WARM_DATA);
+					&sum, CURSEG_WARM_DATA, NULL, false);
 	set_data_blkaddr(dn);
 
 	/* update i_size */
@@ -1321,7 +1354,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
 
 	/* flush pending IOs and wait for a while in the ENOMEM case */
 	if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
-		f2fs_flush_merged_bios(fio->sbi);
+		f2fs_flush_merged_writes(fio->sbi);
 		congestion_wait(BLK_RW_ASYNC, HZ/50);
 		gfp_flags |= __GFP_NOFAIL;
 		goto retry_encrypt;
@@ -1368,13 +1401,14 @@ int do_write_data_page(struct f2fs_io_info *fio)
 
 		if (valid_ipu_blkaddr(fio)) {
 			ipu_force = true;
-			fio->need_lock = false;
+			fio->need_lock = LOCK_DONE;
 			goto got_it;
 		}
 	}
 
-	if (fio->need_lock)
-		f2fs_lock_op(fio->sbi);
+	/* Deadlock due to between page->lock and f2fs_lock_op */
+	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
+		return -EAGAIN;
 
 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
 	if (err)
@@ -1388,19 +1422,18 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		goto out_writepage;
 	}
 got_it:
-	err = encrypt_one_page(fio);
-	if (err)
-		goto out_writepage;
-
-	set_page_writeback(page);
-
 	/*
 	 * If current allocation needs SSR,
 	 * it had better in-place writes for updated data.
 	 */
 	if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
+		err = encrypt_one_page(fio);
+		if (err)
+			goto out_writepage;
+
+		set_page_writeback(page);
 		f2fs_put_dnode(&dn);
-		if (fio->need_lock)
+		if (fio->need_lock == LOCK_REQ)
 			f2fs_unlock_op(fio->sbi);
 		err = rewrite_data_page(fio);
 		trace_f2fs_do_write_data_page(fio->page, IPU);
@@ -1408,6 +1441,20 @@ int do_write_data_page(struct f2fs_io_info *fio)
 		return err;
 	}
 
+	if (fio->need_lock == LOCK_RETRY) {
+		if (!f2fs_trylock_op(fio->sbi)) {
+			err = -EAGAIN;
+			goto out_writepage;
+		}
+		fio->need_lock = LOCK_REQ;
+	}
+
+	err = encrypt_one_page(fio);
+	if (err)
+		goto out_writepage;
+
+	set_page_writeback(page);
+
 	/* LFS mode write path */
 	write_data_page(&dn, fio);
 	trace_f2fs_do_write_data_page(page, OPU);
@@ -1417,7 +1464,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
 out_writepage:
 	f2fs_put_dnode(&dn);
 out:
-	if (fio->need_lock)
+	if (fio->need_lock == LOCK_REQ)
 		f2fs_unlock_op(fio->sbi);
 	return err;
 }
@@ -1443,11 +1490,14 @@ static int __write_data_page(struct page *page, bool *submitted,
 		.page = page,
 		.encrypted_page = NULL,
 		.submitted = false,
-		.need_lock = true,
+		.need_lock = LOCK_RETRY,
 	};
 
 	trace_f2fs_writepage(page, DATA);
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto redirty_out;
+
 	if (page->index < end_index)
 		goto write;
 
@@ -1461,8 +1511,6 @@ static int __write_data_page(struct page *page, bool *submitted,
 
 	zero_user_segment(page, offset, PAGE_SIZE);
 write:
-	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-		goto redirty_out;
 	if (f2fs_is_drop_cache(inode))
 		goto out;
 	/* we should not write 0'th page having journal header */
@@ -1479,7 +1527,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 
 	/* Dentry blocks are controlled by checkpoint */
 	if (S_ISDIR(inode->i_mode)) {
-		fio.need_lock = false;
+		fio.need_lock = LOCK_DONE;
 		err = do_write_data_page(&fio);
 		goto done;
 	}
@@ -1498,8 +1546,13 @@ static int __write_data_page(struct page *page, bool *submitted,
 			goto out;
 	}
 
-	if (err == -EAGAIN)
+	if (err == -EAGAIN) {
 		err = do_write_data_page(&fio);
+		if (err == -EAGAIN) {
+			fio.need_lock = LOCK_REQ;
+			err = do_write_data_page(&fio);
+		}
+	}
 	if (F2FS_I(inode)->last_disk_size < psize)
 		F2FS_I(inode)->last_disk_size = psize;
 
@@ -1513,8 +1566,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 		ClearPageUptodate(page);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
-						DATA, WRITE);
+		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
 		clear_inode_flag(inode, FI_HOT_DATA);
 		remove_dirty_inode(inode);
 		submitted = NULL;
@@ -1525,7 +1577,7 @@ static int __write_data_page(struct page *page, bool *submitted,
 		f2fs_balance_fs(sbi, need_balance_fs);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
-		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		f2fs_submit_merged_write(sbi, DATA);
 		submitted = NULL;
 	}
 
@@ -1618,7 +1670,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 			}
 
 			done_index = page->index;
-
+retry_write:
 			lock_page(page);
 
 			if (unlikely(page->mapping != mapping)) {
@@ -1654,6 +1706,15 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 					unlock_page(page);
 					ret = 0;
 					continue;
+				} else if (ret == -EAGAIN) {
+					ret = 0;
+					if (wbc->sync_mode == WB_SYNC_ALL) {
+						cond_resched();
+						congestion_wait(BLK_RW_ASYNC,
+									HZ/50);
+						goto retry_write;
+					}
+					continue;
 				}
 				done_index = page->index + 1;
 				done = 1;
@@ -1684,8 +1745,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 		mapping->writeback_index = done_index;
 
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
-						0, last_idx, DATA, WRITE);
+		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
+						0, last_idx, DATA);
 
 	return ret;
 }
@@ -1706,6 +1767,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
 		return 0;
 
+	/* during POR, we don't need to trigger writepage at all. */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
 			available_free_memory(sbi, DIRTY_DENTS))
@@ -1715,10 +1780,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
 		goto skip_write;
 
-	/* during POR, we don't need to trigger writepage at all. */
-	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-		goto skip_write;
-
 	trace_f2fs_writepages(mapping->host, wbc, DATA);
 
 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
@@ -1753,8 +1814,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
 	loff_t i_size = i_size_read(inode);
 
 	if (to > i_size) {
+		down_write(&F2FS_I(inode)->i_mmap_sem);
 		truncate_pagecache(inode, i_size);
 		truncate_blocks(inode, i_size, true);
+		up_write(&F2FS_I(inode)->i_mmap_sem);
 	}
 }
 
@@ -2152,8 +2215,12 @@ int f2fs_migrate_page(struct address_space *mapping,
 	BUG_ON(PageWriteback(page));
 
 	/* migrating an atomic written page is safe with the inmem_lock hold */
-	if (atomic_written && !mutex_trylock(&fi->inmem_lock))
-		return -EAGAIN;
+	if (atomic_written) {
+		if (mode != MIGRATE_SYNC)
+			return -EBUSY;
+		if (!mutex_trylock(&fi->inmem_lock))
+			return -EAGAIN;
+	}
 
 	/*
 	 * A reference is expected if PagePrivate set when move mapping,

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 94756f5..37f9c7f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c

@@ -415,7 +415,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
 	 * We lost i_pino from now on.
 	 */
 	if (is_inode_flag_set(inode, FI_INC_LINK)) {
-		file_lost_pino(inode);
+		if (!S_ISDIR(inode->i_mode))
+			file_lost_pino(inode);
 		/*
 		 * If link the tmpfile to alias through linkat path,
 		 * we should remove this inode from orphan list.

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2f98d70..ff2352a 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c

@@ -320,7 +320,7 @@ static void __drop_largest_extent(struct inode *inode,
 }
 
 /* return true, if inode page is changed */
-bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct extent_tree *et;
@@ -358,6 +358,16 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
 	return false;
 }
 
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
+{
+	bool ret =  __f2fs_init_extent_tree(inode, i_ext);
+
+	if (!F2FS_I(inode)->extent_tree)
+		set_inode_flag(inode, FI_NO_EXTENT);
+
+	return ret;
+}
+
 static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 							struct extent_info *ei)
 {

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd2e651..94a88b2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h

@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/quotaops.h>
 #ifdef CONFIG_F2FS_FS_ENCRYPTION
 #include <linux/fscrypt_supp.h>
 #else
@@ -88,6 +89,8 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
 #define F2FS_MOUNT_ADAPTIVE		0x00020000
 #define F2FS_MOUNT_LFS			0x00040000
+#define F2FS_MOUNT_USRQUOTA		0x00080000
+#define F2FS_MOUNT_GRPQUOTA		0x00100000
 
 #define clear_opt(sbi, option)	((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -303,6 +306,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 						struct f2fs_move_range)
 #define F2FS_IOC_FLUSH_DEVICE		_IOW(F2FS_IOCTL_MAGIC, 10,	\
 						struct f2fs_flush_device)
+#define F2FS_IOC_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,	\
+						struct f2fs_gc_range)
 
 #define F2FS_IOC_SET_ENCRYPTION_POLICY	FS_IOC_SET_ENCRYPTION_POLICY
 #define F2FS_IOC_GET_ENCRYPTION_POLICY	FS_IOC_GET_ENCRYPTION_POLICY
@@ -327,6 +332,12 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 #define F2FS_IOC32_GETVERSION		FS_IOC32_GETVERSION
 #endif
 
+struct f2fs_gc_range {
+	u32 sync;
+	u64 start;
+	u64 len;
+};
+
 struct f2fs_defragment {
 	u64 start;
 	u64 len;
@@ -513,12 +524,19 @@ struct f2fs_inode_info {
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
 	loff_t	last_disk_size;		/* lastly written file size */
 
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+
+	/* quota space reservation, managed internally by quota code */
+	qsize_t i_reserved_quota;
+#endif
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
 	struct list_head inmem_pages;	/* inmemory pages managed by f2fs */
 	struct mutex inmem_lock;	/* lock for inmemory pages */
 	struct extent_tree *extent_tree;	/* cached extent_tree entry */
 	struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
+	struct rw_semaphore i_mmap_sem;
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -792,17 +810,33 @@ enum page_type {
 	OPU,
 };
 
+enum temp_type {
+	HOT = 0,	/* must be zero for meta bio */
+	WARM,
+	COLD,
+	NR_TEMP_TYPE,
+};
+
+enum need_lock_type {
+	LOCK_REQ = 0,
+	LOCK_DONE,
+	LOCK_RETRY,
+};
+
 struct f2fs_io_info {
 	struct f2fs_sb_info *sbi;	/* f2fs_sb_info pointer */
 	enum page_type type;	/* contains DATA/NODE/META/META_FLUSH */
+	enum temp_type temp;	/* contains HOT/WARM/COLD */
 	int op;			/* contains REQ_OP_ */
 	int op_flags;		/* req_flag_bits */
 	block_t new_blkaddr;	/* new block address to be written */
 	block_t old_blkaddr;	/* old block address before Cow */
 	struct page *page;	/* page to be written */
 	struct page *encrypted_page;	/* encrypted page */
+	struct list_head list;		/* serialize IOs */
 	bool submitted;		/* indicate IO submission */
-	bool need_lock;		/* indicate we need to lock cp_rwsem */
+	int need_lock;		/* indicate we need to lock cp_rwsem */
+	bool in_list;		/* indicate fio is in io_list */
 };
 
 #define is_read_io(rw) ((rw) == READ)
@@ -812,6 +846,8 @@ struct f2fs_bio_info {
 	sector_t last_block_in_bio;	/* last block number */
 	struct f2fs_io_info fio;	/* store buffered io info. */
 	struct rw_semaphore io_rwsem;	/* blocking op for bio */
+	spinlock_t io_lock;		/* serialize DATA/NODE IOs */
+	struct list_head io_list;	/* track fios */
 };
 
 #define FDEV(i)				(sbi->devs[i])
@@ -879,9 +915,9 @@ struct f2fs_sb_info {
 	struct f2fs_sm_info *sm_info;		/* segment manager */
 
 	/* for bio operations */
-	struct f2fs_bio_info read_io;			/* for read bios */
-	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
-	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
+	struct f2fs_bio_info *write_io[NR_PAGE_TYPE];	/* for write bios */
+	struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE];
+						/* bio ordering for NODE/DATA */
 	int write_io_size_bits;			/* Write IO size bits */
 	mempool_t *write_io_dummy;		/* Dummy pages */
 
@@ -939,6 +975,8 @@ struct f2fs_sb_info {
 	block_t total_valid_block_count;	/* # of valid blocks */
 	block_t discard_blks;			/* discard command candidats */
 	block_t last_valid_block_count;		/* for recovery */
+	block_t reserved_blocks;		/* configurable reserved blocks */
+
 	u32 s_next_generation;			/* for NFS support */
 
 	/* # of pages, see count_type */
@@ -1228,9 +1266,11 @@ static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 
 static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
-	spin_lock(&sbi->cp_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	__set_ckpt_flags(F2FS_CKPT(sbi), f);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
@@ -1244,22 +1284,26 @@ static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f
 
 static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
-	spin_lock(&sbi->cp_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sbi->cp_lock, flags);
 	__clear_ckpt_flags(F2FS_CKPT(sbi), f);
-	spin_unlock(&sbi->cp_lock);
+	spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
 {
+	unsigned long flags;
+
 	set_sbi_flag(sbi, SBI_NEED_FSCK);
 
 	if (lock)
-		spin_lock(&sbi->cp_lock);
+		spin_lock_irqsave(&sbi->cp_lock, flags);
 	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
 	kfree(NM_I(sbi)->nat_bits);
 	NM_I(sbi)->nat_bits = NULL;
 	if (lock)
-		spin_unlock(&sbi->cp_lock);
+		spin_unlock_irqrestore(&sbi->cp_lock, flags);
 }
 
 static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
@@ -1275,6 +1319,11 @@ static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 	down_read(&sbi->cp_rwsem);
 }
 
+static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi)
+{
+	return down_read_trylock(&sbi->cp_rwsem);
+}
+
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
 	up_read(&sbi->cp_rwsem);
@@ -1324,17 +1373,14 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
 	return 0;
 }
 
-#define F2FS_DEFAULT_ALLOCATED_BLOCKS	1
-
 /*
  * Check whether the inode has blocks or not
  */
 static inline int F2FS_HAS_BLOCKS(struct inode *inode)
 {
-	if (F2FS_I(inode)->i_xattr_nid)
-		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
-	else
-		return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
+	block_t xattr_block = F2FS_I(inode)->i_xattr_nid ? 1 : 0;
+
+	return (inode->i_blocks >> F2FS_LOG_SECTORS_PER_BLOCK) > xattr_block;
 }
 
 static inline bool f2fs_has_xattr_block(unsigned int ofs)
@@ -1342,16 +1388,23 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
 	return ofs == XATTR_NODE_OFFSET;
 }
 
-static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool);
-static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
+static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
+static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 				 struct inode *inode, blkcnt_t *count)
 {
-	blkcnt_t diff;
+	blkcnt_t diff = 0, release = 0;
+	block_t avail_user_block_count;
+	int ret;
+
+	ret = dquot_reserve_block(inode, *count);
+	if (ret)
+		return ret;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
 		f2fs_show_injection_info(FAULT_BLOCK);
-		return false;
+		release = *count;
+		goto enospc;
 	}
 #endif
 	/*
@@ -1362,32 +1415,42 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	spin_lock(&sbi->stat_lock);
 	sbi->total_valid_block_count += (block_t)(*count);
-	if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) {
-		diff = sbi->total_valid_block_count - sbi->user_block_count;
+	avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+		diff = sbi->total_valid_block_count - avail_user_block_count;
 		*count -= diff;
-		sbi->total_valid_block_count = sbi->user_block_count;
+		release = diff;
+		sbi->total_valid_block_count = avail_user_block_count;
 		if (!*count) {
 			spin_unlock(&sbi->stat_lock);
 			percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
-			return false;
+			goto enospc;
 		}
 	}
 	spin_unlock(&sbi->stat_lock);
 
-	f2fs_i_blocks_write(inode, *count, true);
-	return true;
+	if (release)
+		dquot_release_reservation_block(inode, release);
+	f2fs_i_blocks_write(inode, *count, true, true);
+	return 0;
+
+enospc:
+	dquot_release_reservation_block(inode, release);
+	return -ENOSPC;
 }
 
 static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
 						struct inode *inode,
-						blkcnt_t count)
+						block_t count)
 {
+	blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
+
 	spin_lock(&sbi->stat_lock);
 	f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
-	f2fs_bug_on(sbi, inode->i_blocks < count);
+	f2fs_bug_on(sbi, inode->i_blocks < sectors);
 	sbi->total_valid_block_count -= (block_t)count;
 	spin_unlock(&sbi->stat_lock);
-	f2fs_i_blocks_write(inode, count, false);
+	f2fs_i_blocks_write(inode, count, false, true);
 }
 
 static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1516,51 +1579,70 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
 	return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
 }
 
-static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
-						struct inode *inode)
+static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
+					struct inode *inode, bool is_inode)
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
+	bool quota = inode && !is_inode;
+
+	if (quota) {
+		int ret = dquot_reserve_block(inode, 1);
+		if (ret)
+			return ret;
+	}
 
 	spin_lock(&sbi->stat_lock);
 
 	valid_block_count = sbi->total_valid_block_count + 1;
-	if (unlikely(valid_block_count > sbi->user_block_count)) {
+	if (unlikely(valid_block_count + sbi->reserved_blocks >
+						sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
-		return false;
+		goto enospc;
 	}
 
 	valid_node_count = sbi->total_valid_node_count + 1;
 	if (unlikely(valid_node_count > sbi->total_node_count)) {
 		spin_unlock(&sbi->stat_lock);
-		return false;
+		goto enospc;
 	}
 
-	if (inode)
-		f2fs_i_blocks_write(inode, 1, true);
-
 	sbi->total_valid_node_count++;
 	sbi->total_valid_block_count++;
 	spin_unlock(&sbi->stat_lock);
 
+	if (inode) {
+		if (is_inode)
+			f2fs_mark_inode_dirty_sync(inode, true);
+		else
+			f2fs_i_blocks_write(inode, 1, true, true);
+	}
+
 	percpu_counter_inc(&sbi->alloc_valid_block_count);
-	return true;
+	return 0;
+
+enospc:
+	if (quota)
+		dquot_release_reservation_block(inode, 1);
+	return -ENOSPC;
 }
 
 static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
-						struct inode *inode)
+					struct inode *inode, bool is_inode)
 {
 	spin_lock(&sbi->stat_lock);
 
 	f2fs_bug_on(sbi, !sbi->total_valid_block_count);
 	f2fs_bug_on(sbi, !sbi->total_valid_node_count);
-	f2fs_bug_on(sbi, !inode->i_blocks);
+	f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
 
-	f2fs_i_blocks_write(inode, 1, false);
 	sbi->total_valid_node_count--;
 	sbi->total_valid_block_count--;
 
 	spin_unlock(&sbi->stat_lock);
+
+	if (!is_inode)
+		f2fs_i_blocks_write(inode, 1, false, true);
 }
 
 static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -1835,13 +1917,21 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc)
 }
 
 static inline void f2fs_i_blocks_write(struct inode *inode,
-					blkcnt_t diff, bool add)
+					block_t diff, bool add, bool claim)
 {
 	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
 	bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER);
 
-	inode->i_blocks = add ? inode->i_blocks + diff :
-				inode->i_blocks - diff;
+	/* add = 1, claim = 1 should be dquot_reserve_block in pair */
+	if (add) {
+		if (claim)
+			dquot_claim_block(inode, diff);
+		else
+			dquot_alloc_block_nofail(inode, diff);
+	} else {
+		dquot_free_block(inode, diff);
+	}
+
 	f2fs_mark_inode_dirty_sync(inode, true);
 	if (clean || recover)
 		set_inode_flag(inode, FI_AUTO_RECOVER);
@@ -2236,6 +2326,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
 bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void stop_discard_thread(struct f2fs_sb_info *sbi);
 void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void release_discard_addrs(struct f2fs_sb_info *sbi);
@@ -2258,7 +2349,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
 			bool recover_newaddr);
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			block_t old_blkaddr, block_t *new_blkaddr,
-			struct f2fs_summary *sum, int type);
+			struct f2fs_summary *sum, int type,
+			struct f2fs_io_info *fio, bool add_list);
 void f2fs_wait_on_page_writeback(struct page *page,
 			enum page_type type, bool ordered);
 void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
@@ -2308,14 +2400,13 @@ void destroy_checkpoint_caches(void);
 /*
  * data.c
  */
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
-			int rw);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
+void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 				struct inode *inode, nid_t ino, pgoff_t idx,
-				enum page_type type, int rw);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+				enum page_type type);
+void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
 int f2fs_submit_page_bio(struct f2fs_io_info *fio);
-int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+int f2fs_submit_page_write(struct f2fs_io_info *fio);
 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 			block_t blk_addr, struct bio *bio);
 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
@@ -2633,6 +2724,14 @@ int __init create_extent_cache(void);
 void destroy_extent_cache(void);
 
 /*
+ * sysfs.c
+ */
+int __init f2fs_register_sysfs(void);
+void f2fs_unregister_sysfs(void);
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi);
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi);
+
+/*
  * crypto support
  */
 static inline bool f2fs_encrypted_inode(struct inode *inode)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 61af721..a0e6d2c6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c

@@ -33,6 +33,18 @@
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+static int f2fs_filemap_fault(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	int err;
+
+	down_read(&F2FS_I(inode)->i_mmap_sem);
+	err = filemap_fault(vmf);
+	up_read(&F2FS_I(inode)->i_mmap_sem);
+
+	return err;
+}
+
 static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
@@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 	f2fs_balance_fs(sbi, dn.node_changed);
 
 	file_update_time(vmf->vma->vm_file);
+	down_read(&F2FS_I(inode)->i_mmap_sem);
 	lock_page(page);
 	if (unlikely(page->mapping != inode->i_mapping ||
 			page_offset(page) > i_size_read(inode) ||
 			!PageUptodate(page))) {
 		unlock_page(page);
 		err = -EFAULT;
-		goto out;
+		goto out_sem;
 	}
 
 	/*
@@ -94,6 +107,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
 
+out_sem:
+	up_read(&F2FS_I(inode)->i_mmap_sem);
 out:
 	sb_end_pagefault(inode->i_sb);
 	f2fs_update_time(sbi, REQ_TIME);
@@ -101,7 +116,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct f2fs_file_vm_ops = {
-	.fault		= filemap_fault,
+	.fault		= f2fs_filemap_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite	= f2fs_vm_page_mkwrite,
 };
@@ -415,14 +430,6 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	struct inode *inode = file_inode(file);
 	int err;
 
-	if (f2fs_encrypted_inode(inode)) {
-		err = fscrypt_get_encryption_info(inode);
-		if (err)
-			return 0;
-		if (!f2fs_encrypted_inode(inode))
-			return -ENOKEY;
-	}
-
 	/* we don't need to use inline_data strictly */
 	err = f2fs_convert_inline_inode(inode);
 	if (err)
@@ -435,11 +442,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int f2fs_file_open(struct inode *inode, struct file *filp)
 {
-	int ret = generic_file_open(inode, filp);
 	struct dentry *dir;
 
-	if (!ret && f2fs_encrypted_inode(inode)) {
-		ret = fscrypt_get_encryption_info(inode);
+	if (f2fs_encrypted_inode(inode)) {
+		int ret = fscrypt_get_encryption_info(inode);
 		if (ret)
 			return -EACCES;
 		if (!fscrypt_has_encryption_key(inode))
@@ -452,7 +458,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
 		return -EPERM;
 	}
 	dput(dir);
-	return ret;
+	return dquot_file_open(inode, filp);
 }
 
 int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
@@ -527,8 +533,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
 truncate_out:
 	f2fs_wait_on_page_writeback(page, DATA, true);
 	zero_user(page, offset, PAGE_SIZE - offset);
-	if (!cache_only || !f2fs_encrypted_inode(inode) ||
-					!S_ISREG(inode->i_mode))
+
+	/* An encrypted inode should have a key and truncate the last page. */
+	f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
+	if (!cache_only)
 		set_page_dirty(page);
 	f2fs_put_page(page, 1);
 	return 0;
@@ -633,11 +641,31 @@ int f2fs_truncate(struct inode *inode)
 }
 
 int f2fs_getattr(const struct path *path, struct kstat *stat,
-		 u32 request_mask, unsigned int flags)
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	unsigned int flags;
+
+	flags = fi->i_flags & FS_FL_USER_VISIBLE;
+	if (flags & FS_APPEND_FL)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (flags & FS_COMPR_FL)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (f2fs_encrypted_inode(inode))
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+	if (flags & FS_IMMUTABLE_FL)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (flags & FS_NODUMP_FL)
+		stat->attributes |= STATX_ATTR_NODUMP;
+
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_ENCRYPTED |
+				  STATX_ATTR_IMMUTABLE |
+				  STATX_ATTR_NODUMP);
+
 	generic_fillattr(inode, stat);
-	stat->blocks <<= 3;
 	return 0;
 }
 
@@ -681,14 +709,34 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
+	if (is_quota_modification(inode, attr)) {
+		err = dquot_initialize(inode);
+		if (err)
+			return err;
+	}
+	if ((attr->ia_valid & ATTR_UID &&
+		!uid_eq(attr->ia_uid, inode->i_uid)) ||
+		(attr->ia_valid & ATTR_GID &&
+		!gid_eq(attr->ia_gid, inode->i_gid))) {
+		err = dquot_transfer(inode, attr);
+		if (err)
+			return err;
+	}
+
 	if (attr->ia_valid & ATTR_SIZE) {
-		if (f2fs_encrypted_inode(inode) &&
-				fscrypt_get_encryption_info(inode))
-			return -EACCES;
+		if (f2fs_encrypted_inode(inode)) {
+			err = fscrypt_get_encryption_info(inode);
+			if (err)
+				return err;
+			if (!fscrypt_has_encryption_key(inode))
+				return -ENOKEY;
+		}
 
 		if (attr->ia_size <= i_size_read(inode)) {
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_setsize(inode, attr->ia_size);
 			err = f2fs_truncate(inode);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 			if (err)
 				return err;
 		} else {
@@ -696,7 +744,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 			 * do not trim all blocks after i_size if target size is
 			 * larger than i_size.
 			 */
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_setsize(inode, attr->ia_size);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 
 			/* should convert inline inode here */
 			if (!f2fs_may_inline_data(inode)) {
@@ -839,12 +889,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
 			blk_start = (loff_t)pg_start << PAGE_SHIFT;
 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
+			down_write(&F2FS_I(inode)->i_mmap_sem);
 			truncate_inode_pages_range(mapping, blk_start,
 					blk_end - 1);
 
 			f2fs_lock_op(sbi);
 			ret = truncate_hole(inode, pg_start, pg_end);
 			f2fs_unlock_op(sbi);
+			up_write(&F2FS_I(inode)->i_mmap_sem);
 		}
 	}
 
@@ -957,9 +1009,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
 
 				if (do_replace[i]) {
 					f2fs_i_blocks_write(src_inode,
-								1, false);
+							1, false, false);
 					f2fs_i_blocks_write(dst_inode,
-								1, true);
+							1, true, false);
 					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
 					blkaddr[i], ni.version, true, false);
 
@@ -1083,16 +1135,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	pg_start = offset >> PAGE_SHIFT;
 	pg_end = (offset + len) >> PAGE_SHIFT;
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
 	if (ret)
-		return ret;
+		goto out;
 
 	truncate_pagecache(inode, offset);
 
 	ret = f2fs_do_collapse(inode, pg_start, pg_end);
 	if (ret)
-		return ret;
+		goto out;
 
 	/* write out all moved pages, if possible */
 	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1105,6 +1158,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
 
+out:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 	return ret;
 }
 
@@ -1169,9 +1224,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 	if (ret)
 		return ret;
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
 	if (ret)
-		return ret;
+		goto out_sem;
 
 	truncate_pagecache_range(inode, offset, offset + len - 1);
 
@@ -1185,7 +1241,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 		ret = fill_zero(inode, pg_start, off_start,
 						off_end - off_start);
 		if (ret)
-			return ret;
+			goto out_sem;
 
 		new_size = max_t(loff_t, new_size, offset + len);
 	} else {
@@ -1193,7 +1249,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 			ret = fill_zero(inode, pg_start++, off_start,
 						PAGE_SIZE - off_start);
 			if (ret)
-				return ret;
+				goto out_sem;
 
 			new_size = max_t(loff_t, new_size,
 					(loff_t)pg_start << PAGE_SHIFT);
@@ -1242,6 +1298,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
 out:
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
 		f2fs_i_size_write(inode, new_size);
+out_sem:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 
 	return ret;
 }
@@ -1271,14 +1329,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 	f2fs_balance_fs(sbi, true);
 
+	down_write(&F2FS_I(inode)->i_mmap_sem);
 	ret = truncate_blocks(inode, i_size_read(inode), true);
 	if (ret)
-		return ret;
+		goto out;
 
 	/* write out all dirty pages from offset */
 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
 	if (ret)
-		return ret;
+		goto out;
 
 	truncate_pagecache(inode, offset);
 
@@ -1307,6 +1366,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
+out:
+	up_write(&F2FS_I(inode)->i_mmap_sem);
 	return ret;
 }
 
@@ -1475,6 +1536,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	inode_lock(inode);
 
+	/* Is it quota file? Do not allow user to mess with it */
+	if (IS_NOQUOTA(inode)) {
+		inode_unlock(inode);
+		ret = -EPERM;
+		goto unlock_out;
+	}
+
 	flags = f2fs_mask_flags(inode->i_mode, flags);
 
 	oldflags = fi->i_flags;
@@ -1493,7 +1561,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
 
 	inode->i_ctime = current_time(inode);
 	f2fs_set_inode_flags(inode);
-
+	f2fs_mark_inode_dirty_sync(inode, false);
+unlock_out:
 	inode_unlock(inode);
 out:
 	mnt_drop_write_file(filp);
@@ -1862,6 +1931,50 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
 	return ret;
 }
 
+static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_gc_range range;
+	u64 end;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+
+	if (f2fs_readonly(sbi->sb))
+		return -EROFS;
+
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
+	end = range.start + range.len;
+	if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi))
+		return -EINVAL;
+do_more:
+	if (!range.sync) {
+		if (!mutex_trylock(&sbi->gc_mutex)) {
+			ret = -EBUSY;
+			goto out;
+		}
+	} else {
+		mutex_lock(&sbi->gc_mutex);
+	}
+
+	ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
+	range.start += sbi->blocks_per_seg;
+	if (range.start <= end)
+		goto do_more;
+out:
+	mnt_drop_write_file(filp);
+	return ret;
+}
+
 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -2306,6 +2419,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_ioc_get_encryption_pwsalt(filp, arg);
 	case F2FS_IOC_GARBAGE_COLLECT:
 		return f2fs_ioc_gc(filp, arg);
+	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
+		return f2fs_ioc_gc_range(filp, arg);
 	case F2FS_IOC_WRITE_CHECKPOINT:
 		return f2fs_ioc_write_checkpoint(filp, arg);
 	case F2FS_IOC_DEFRAGMENT:
@@ -2326,11 +2441,6 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct blk_plug plug;
 	ssize_t ret;
 
-	if (f2fs_encrypted_inode(inode) &&
-				!fscrypt_has_encryption_key(inode) &&
-				fscrypt_get_encryption_info(inode))
-		return -EACCES;
-
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0) {
@@ -2379,6 +2489,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_GET_ENCRYPTION_PWSALT:
 	case F2FS_IOC_GET_ENCRYPTION_POLICY:
 	case F2FS_IOC_GARBAGE_COLLECT:
+	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
 	case F2FS_IOC_WRITE_CHECKPOINT:
 	case F2FS_IOC_DEFRAGMENT:
 	case F2FS_IOC_MOVE_RANGE:

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 0265221..fa3d2e2 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c

@@ -32,13 +32,14 @@ static int gc_thread_func(void *data)
 
 	wait_ms = gc_th->min_sleep_time;
 
+	set_freezable();
 	do {
+		wait_event_interruptible_timeout(*wq,
+				kthread_should_stop() || freezing(current),
+				msecs_to_jiffies(wait_ms));
+
 		if (try_to_freeze())
 			continue;
-		else
-			wait_event_interruptible_timeout(*wq,
-						kthread_should_stop(),
-						msecs_to_jiffies(wait_ms));
 		if (kthread_should_stop())
 			break;
 
@@ -258,11 +259,20 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
 				valid_blocks * 2 : valid_blocks;
 }
 
+static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	struct seg_entry *se = get_seg_entry(sbi, segno);
+
+	return se->ckpt_valid_blocks > se->valid_blocks ?
+				se->ckpt_valid_blocks : se->valid_blocks;
+}
+
 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct victim_sel_policy *p)
 {
 	if (p->alloc_mode == SSR)
-		return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+		return get_ssr_cost(sbi, segno);
 
 	/* alloc_mode == LFS */
 	if (p->gc_mode == GC_GREEDY)
@@ -586,9 +596,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
 		.type = DATA,
+		.temp = COLD,
 		.op = REQ_OP_READ,
 		.op_flags = 0,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 	struct dnode_of_data dn;
 	struct f2fs_summary sum;
@@ -632,7 +644,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
 
 	allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
-							&sum, CURSEG_COLD_DATA);
+					&sum, CURSEG_COLD_DATA, NULL, false);
 
 	fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
 					FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -670,7 +682,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	fio.op = REQ_OP_WRITE;
 	fio.op_flags = REQ_SYNC;
 	fio.new_blkaddr = newaddr;
-	f2fs_submit_page_mbio(&fio);
+	f2fs_submit_page_write(&fio);
 
 	f2fs_update_data_blkaddr(&dn, newaddr);
 	set_inode_flag(inode, FI_APPEND_WRITE);
@@ -712,12 +724,13 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 		struct f2fs_io_info fio = {
 			.sbi = F2FS_I_SB(inode),
 			.type = DATA,
+			.temp = COLD,
 			.op = REQ_OP_WRITE,
 			.op_flags = REQ_SYNC,
 			.old_blkaddr = NULL_ADDR,
 			.page = page,
 			.encrypted_page = NULL,
-			.need_lock = true,
+			.need_lock = LOCK_REQ,
 		};
 		bool is_dirty = PageDirty(page);
 		int err;
@@ -936,8 +949,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 	}
 
 	if (gc_type == FG_GC)
-		f2fs_submit_merged_bio(sbi,
-				(type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
+		f2fs_submit_merged_write(sbi,
+				(type == SUM_TYPE_NODE) ? NODE : DATA);
 
 	blk_finish_plug(&plug);
 
@@ -955,7 +968,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 {
 	int gc_type = sync ? FG_GC : BG_GC;
 	int sec_freed = 0;
-	int ret = -EINVAL;
+	int ret;
 	struct cp_control cpc;
 	unsigned int init_segno = segno;
 	struct gc_inode_list gc_list = {
@@ -965,8 +978,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 
 	cpc.reason = __get_cp_reason(sbi);
 gc_more:
-	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
+	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
+		ret = -EINVAL;
 		goto stop;
+	}
 	if (unlikely(f2fs_cp_error(sbi))) {
 		ret = -EIO;
 		goto stop;
@@ -987,6 +1002,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 			gc_type = FG_GC;
 	}
 
+	ret = -EINVAL;
 	/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
 	if (gc_type == BG_GC && !background)
 		goto stop;

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index e4c527c..e0fd437 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c

@@ -316,12 +316,12 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
 int make_empty_inline_dir(struct inode *inode, struct inode *parent,
 							struct page *ipage)
 {
-	struct f2fs_inline_dentry *dentry_blk;
+	struct f2fs_inline_dentry *inline_dentry;
 	struct f2fs_dentry_ptr d;
 
-	dentry_blk = inline_data_addr(ipage);
+	inline_dentry = inline_data_addr(ipage);
 
-	make_dentry_ptr_inline(NULL, &d, dentry_blk);
+	make_dentry_ptr_inline(NULL, &d, inline_dentry);
 	do_make_empty_dir(inode, parent, &d);
 
 	set_page_dirty(ipage);
@@ -500,7 +500,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	struct page *ipage;
 	unsigned int bit_pos;
 	f2fs_hash_t name_hash;
-	struct f2fs_inline_dentry *dentry_blk = NULL;
+	struct f2fs_inline_dentry *inline_dentry = NULL;
 	struct f2fs_dentry_ptr d;
 	int slots = GET_DENTRY_SLOTS(new_name->len);
 	struct page *page = NULL;
@@ -510,11 +510,11 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	if (IS_ERR(ipage))
 		return PTR_ERR(ipage);
 
-	dentry_blk = inline_data_addr(ipage);
-	bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+	inline_dentry = inline_data_addr(ipage);
+	bit_pos = room_for_filename(&inline_dentry->dentry_bitmap,
 						slots, NR_INLINE_DENTRY);
 	if (bit_pos >= NR_INLINE_DENTRY) {
-		err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+		err = f2fs_convert_inline_dir(dir, ipage, inline_dentry);
 		if (err)
 			return err;
 		err = -EAGAIN;
@@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
 	f2fs_wait_on_page_writeback(ipage, NODE, true);
 
 	name_hash = f2fs_dentry_hash(new_name, NULL);
-	make_dentry_ptr_inline(NULL, &d, dentry_blk);
+	make_dentry_ptr_inline(NULL, &d, inline_dentry);
 	f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
 
 	set_page_dirty(ipage);
@@ -586,14 +586,14 @@ bool f2fs_empty_inline_dir(struct inode *dir)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	struct page *ipage;
 	unsigned int bit_pos = 2;
-	struct f2fs_inline_dentry *dentry_blk;
+	struct f2fs_inline_dentry *inline_dentry;
 
 	ipage = get_node_page(sbi, dir->i_ino);
 	if (IS_ERR(ipage))
 		return false;
 
-	dentry_blk = inline_data_addr(ipage);
-	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+	inline_dentry = inline_data_addr(ipage);
+	bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap,
 					NR_INLINE_DENTRY,
 					bit_pos);
 

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 518f496..6cd312a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c

@@ -16,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 
 #include <trace/events/f2fs.h>
 
@@ -44,7 +45,6 @@ void f2fs_set_inode_flags(struct inode *inode)
 		new_fl |= S_DIRSYNC;
 	inode_set_flags(inode, new_fl,
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	f2fs_mark_inode_dirty_sync(inode, false);
 }
 
 static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -130,7 +130,7 @@ static int do_read_inode(struct inode *inode)
 	i_gid_write(inode, le32_to_cpu(ri->i_gid));
 	set_nlink(inode, le32_to_cpu(ri->i_links));
 	inode->i_size = le64_to_cpu(ri->i_size);
-	inode->i_blocks = le64_to_cpu(ri->i_blocks);
+	inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
 
 	inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
 	inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
@@ -226,6 +226,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
 		ret = -EIO;
 		goto bad_inode;
 	}
+	f2fs_set_inode_flags(inode);
 	unlock_new_inode(inode);
 	trace_f2fs_iget(inode);
 	return inode;
@@ -267,7 +268,7 @@ int update_inode(struct inode *inode, struct page *node_page)
 	ri->i_gid = cpu_to_le32(i_gid_read(inode));
 	ri->i_links = cpu_to_le32(inode->i_nlink);
 	ri->i_size = cpu_to_le64(i_size_read(inode));
-	ri->i_blocks = cpu_to_le64(inode->i_blocks);
+	ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
 
 	if (et) {
 		read_lock(&et->lock);
@@ -372,6 +373,8 @@ void f2fs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
+	dquot_initialize(inode);
+
 	remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
 	remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
 
@@ -404,8 +407,11 @@ void f2fs_evict_inode(struct inode *inode)
 
 	if (err)
 		update_inode_page(inode);
+	dquot_free_inode(inode);
 	sb_end_intwrite(inode->i_sb);
 no_delete:
+	dquot_drop(inode);
+
 	stat_dec_inline_xattr(inode);
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
@@ -425,9 +431,10 @@ void f2fs_evict_inode(struct inode *inode)
 	if (is_inode_flag_set(inode, FI_FREE_NID)) {
 		alloc_nid_failed(sbi, inode->i_ino);
 		clear_inode_flag(inode, FI_FREE_NID);
+	} else {
+		f2fs_bug_on(sbi, err &&
+			!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
 	}
-	f2fs_bug_on(sbi, err &&
-		!exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
 out_clear:
 	fscrypt_put_encryption_info(inode, NULL);
 	clear_inode(inode);

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c31b40e..760d852 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c

@@ -15,6 +15,7 @@
 #include <linux/ctype.h>
 #include <linux/dcache.h>
 #include <linux/namei.h>
+#include <linux/quotaops.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -42,6 +43,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	}
 	f2fs_unlock_op(sbi);
 
+	nid_free = true;
+
 	inode_init_owner(inode, dir, mode);
 
 	inode->i_ino = ino;
@@ -52,10 +55,17 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	err = insert_inode_locked(inode);
 	if (err) {
 		err = -EINVAL;
-		nid_free = true;
 		goto fail;
 	}
 
+	err = dquot_initialize(inode);
+	if (err)
+		goto fail_drop;
+
+	err = dquot_alloc_inode(inode);
+	if (err)
+		goto fail_drop;
+
 	/* If the directory encrypted, then we should encrypt the inode. */
 	if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
 		f2fs_set_encrypted_inode(inode);
@@ -85,6 +95,16 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 		set_inode_flag(inode, FI_FREE_NID);
 	iput(inode);
 	return ERR_PTR(err);
+fail_drop:
+	trace_f2fs_new_inode(inode, err);
+	dquot_drop(inode);
+	inode->i_flags |= S_NOQUOTA;
+	if (nid_free)
+		set_inode_flag(inode, FI_FREE_NID);
+	clear_nlink(inode);
+	unlock_new_inode(inode);
+	iput(inode);
+	return ERR_PTR(err);
 }
 
 static int is_multimedia_file(const unsigned char *s, const char *sub)
@@ -136,6 +156,10 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	nid_t ino = 0;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -180,6 +204,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 			!fscrypt_has_permitted_context(dir, inode))
 		return -EPERM;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	f2fs_balance_fs(sbi, true);
 
 	inode->i_ctime = current_time(inode);
@@ -347,6 +375,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 
 	trace_f2fs_unlink_enter(dir, dentry);
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	de = f2fs_find_entry(dir, &dentry->d_name, &page);
 	if (!de) {
 		if (IS_ERR(page))
@@ -413,6 +445,10 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 	if (disk_link.len > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -500,6 +536,10 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct inode *inode;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, S_IFDIR | mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -548,6 +588,10 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	int err = 0;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -583,6 +627,10 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	int err;
 
+	err = dquot_initialize(dir);
+	if (err)
+		return err;
+
 	inode = f2fs_new_inode(dir, mode);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
@@ -676,6 +724,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto out;
 	}
 
+	err = dquot_initialize(old_dir);
+	if (err)
+		goto out;
+
+	err = dquot_initialize(new_dir);
+	if (err)
+		goto out;
+
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))
@@ -772,7 +828,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	down_write(&F2FS_I(old_inode)->i_sem);
-	file_lost_pino(old_inode);
+	if (!old_dir_entry || whiteout)
+		file_lost_pino(old_inode);
+	else
+		F2FS_I(old_inode)->i_pino = new_dir->i_ino;
 	up_write(&F2FS_I(old_inode)->i_sem);
 
 	old_inode->i_ctime = current_time(old_inode);
@@ -853,6 +912,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 			 !fscrypt_has_permitted_context(old_dir, new_inode)))
 		return -EPERM;
 
+	err = dquot_initialize(old_dir);
+	if (err)
+		goto out;
+
+	err = dquot_initialize(new_dir);
+	if (err)
+		goto out;
+
 	old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_entry) {
 		if (IS_ERR(old_page))

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4547c5c..d53fe62 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c

@@ -158,9 +158,6 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 	nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
 	struct nat_entry_set *head;
 
-	if (get_nat_flag(ne, IS_DIRTY))
-		return;
-
 	head = radix_tree_lookup(&nm_i->nat_set_root, set);
 	if (!head) {
 		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
@@ -171,10 +168,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 		head->entry_cnt = 0;
 		f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
 	}
-	list_move_tail(&ne->list, &head->entry_list);
+
+	if (get_nat_flag(ne, IS_DIRTY))
+		goto refresh_list;
+
 	nm_i->dirty_nat_cnt++;
 	head->entry_cnt++;
 	set_nat_flag(ne, IS_DIRTY, true);
+refresh_list:
+	if (nat_get_blkaddr(ne) == NEW_ADDR)
+		list_del_init(&ne->list);
+	else
+		list_move_tail(&ne->list, &head->entry_list);
 }
 
 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -673,15 +678,11 @@ static void truncate_node(struct dnode_of_data *dn)
 	struct node_info ni;
 
 	get_node_info(sbi, dn->nid, &ni);
-	if (dn->inode->i_blocks == 0) {
-		f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
-		goto invalidate;
-	}
 	f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 
 	/* Deallocate node address */
 	invalidate_blocks(sbi, ni.blk_addr);
-	dec_valid_node_count(sbi, dn->inode);
+	dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
 	set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 	if (dn->nid == dn->inode->i_ino) {
@@ -689,7 +690,7 @@ static void truncate_node(struct dnode_of_data *dn)
 		dec_valid_inode_count(sbi);
 		f2fs_inode_synced(dn->inode);
 	}
-invalidate:
+
 	clear_node_page_dirty(dn->node_page);
 	set_sbi_flag(sbi, SBI_IS_DIRTY);
 
@@ -1006,7 +1007,7 @@ int remove_inode_page(struct inode *inode)
 
 	/* 0 is possible, after f2fs_new_inode() has failed */
 	f2fs_bug_on(F2FS_I_SB(inode),
-			inode->i_blocks != 0 && inode->i_blocks != 1);
+			inode->i_blocks != 0 && inode->i_blocks != 8);
 
 	/* will put inode & node pages */
 	truncate_node(&dn);
@@ -1039,10 +1040,9 @@ struct page *new_node_page(struct dnode_of_data *dn,
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
-		err = -ENOSPC;
+	if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
 		goto fail;
-	}
+
 #ifdef CONFIG_F2FS_CHECK_FS
 	get_node_info(sbi, dn->nid, &new_ni);
 	f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
@@ -1152,6 +1152,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
 		f2fs_put_page(page, 1);
 		return ERR_PTR(err);
 	} else if (err == LOCKED_PAGE) {
+		err = 0;
 		goto page_hit;
 	}
 
@@ -1165,15 +1166,22 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
 		goto repeat;
 	}
 
-	if (unlikely(!PageUptodate(page)))
+	if (unlikely(!PageUptodate(page))) {
+		err = -EIO;
 		goto out_err;
+	}
 page_hit:
 	if(unlikely(nid != nid_of_node(page))) {
-		f2fs_bug_on(sbi, 1);
+		f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
+			"nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+			nid, nid_of_node(page), ino_of_node(page),
+			ofs_of_node(page), cpver_of_node(page),
+			next_blkaddr_of_node(page));
 		ClearPageUptodate(page);
+		err = -EINVAL;
 out_err:
 		f2fs_put_page(page, 1);
-		return ERR_PTR(-EIO);
+		return ERR_PTR(err);
 	}
 	return page;
 }
@@ -1373,15 +1381,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 	up_read(&sbi->node_write);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
-						page->index, NODE, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
+						page->index, NODE);
 		submitted = NULL;
 	}
 
 	unlock_page(page);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
-		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+		f2fs_submit_merged_write(sbi, NODE);
 		submitted = NULL;
 	}
 	if (submitted)
@@ -1518,8 +1526,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 	}
 out:
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
-							NODE, WRITE);
+		f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
 	return ret ? -EIO: 0;
 }
 
@@ -1625,7 +1632,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
 	}
 out:
 	if (nwritten)
-		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+		f2fs_submit_merged_write(sbi, NODE);
 	return ret;
 }
 
@@ -1675,6 +1682,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 	struct blk_plug plug;
 	long diff;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto skip_write;
+
 	/* balancing f2fs's metadata in background */
 	f2fs_balance_fs_bg(sbi);
 
@@ -2192,14 +2202,14 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 	get_node_info(sbi, prev_xnid, &ni);
 	f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 	invalidate_blocks(sbi, ni.blk_addr);
-	dec_valid_node_count(sbi, inode);
+	dec_valid_node_count(sbi, inode, false);
 	set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 recover_xnid:
 	/* 2: update xattr nid in inode */
 	remove_free_nid(sbi, new_xnid);
 	f2fs_i_xnid_write(inode, new_xnid);
-	if (unlikely(!inc_valid_node_count(sbi, inode)))
+	if (unlikely(inc_valid_node_count(sbi, inode, false)))
 		f2fs_bug_on(sbi, 1);
 	update_inode_page(inode);
 
@@ -2257,7 +2267,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 	new_ni = old_ni;
 	new_ni.ino = ino;
 
-	if (unlikely(!inc_valid_node_count(sbi, NULL)))
+	if (unlikely(inc_valid_node_count(sbi, NULL, true)))
 		WARN_ON(1);
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 	inc_valid_inode_count(sbi);
@@ -2424,8 +2434,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		nid_t nid = nat_get_nid(ne);
 		int offset;
 
-		if (nat_get_blkaddr(ne) == NEW_ADDR)
-			continue;
+		f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
 
 		if (to_journal) {
 			offset = lookup_journal_in_cursum(journal,
@@ -2553,7 +2562,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
-inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int i = 0;

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 558048e..bb53e99 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h

@@ -224,11 +224,7 @@ static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 
 	block_addr -= nm_i->nat_blkaddr;
-	if ((block_addr >> sbi->log_blocks_per_seg) % 2)
-		block_addr -= sbi->blocks_per_seg;
-	else
-		block_addr += sbi->blocks_per_seg;
-
+	block_addr ^= 1 << sbi->log_blocks_per_seg;
 	return block_addr + nm_i->nat_blkaddr;
 }
 

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ea9f455..f964b68 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c

@@ -16,6 +16,7 @@
 #include <linux/kthread.h>
 #include <linux/swap.h>
 #include <linux/timer.h>
+#include <linux/freezer.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -312,7 +313,7 @@ static int __commit_inmem_pages(struct inode *inode,
 			fio.page = page;
 			fio.old_blkaddr = NULL_ADDR;
 			fio.encrypted_page = NULL;
-			fio.need_lock = false,
+			fio.need_lock = LOCK_DONE;
 			err = do_write_data_page(&fio);
 			if (err) {
 				unlock_page(page);
@@ -328,8 +329,7 @@ static int __commit_inmem_pages(struct inode *inode,
 	}
 
 	if (last_idx != ULONG_MAX)
-		f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
-							DATA, WRITE);
+		f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
 
 	if (!err)
 		__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -555,6 +555,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 
 	if (SM_I(sbi)->fcc_info) {
 		fcc = SM_I(sbi)->fcc_info;
+		if (fcc->f2fs_issue_flush)
+			return err;
 		goto init_thread;
 	}
 
@@ -566,6 +568,9 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
 	SM_I(sbi)->fcc_info = fcc;
+	if (!test_opt(sbi, FLUSH_MERGE))
+		return err;
+
 init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
@@ -736,12 +741,15 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+	f2fs_bug_on(sbi, dc->ref);
+
 	if (dc->error == -EOPNOTSUPP)
 		dc->error = 0;
 
 	if (dc->error)
 		f2fs_msg(sbi->sb, KERN_INFO,
-				"Issue discard failed, ret: %d", dc->error);
+			"Issue discard(%u, %u, %u) failed, ret: %d",
+			dc->lstart, dc->start, dc->len, dc->error);
 	__detach_discard_cmd(dcc, dc);
 }
 
@@ -751,10 +759,34 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 
 	dc->error = blk_status_to_errno(bio->bi_status);
 	dc->state = D_DONE;
-	complete(&dc->wait);
+	complete_all(&dc->wait);
 	bio_put(bio);
 }
 
+void __check_sit_bitmap(struct f2fs_sb_info *sbi,
+				block_t start, block_t end)
+{
+#ifdef CONFIG_F2FS_CHECK_FS
+	struct seg_entry *sentry;
+	unsigned int segno;
+	block_t blk = start;
+	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
+	unsigned long *map;
+
+	while (blk < end) {
+		segno = GET_SEGNO(sbi, blk);
+		sentry = get_seg_entry(sbi, segno);
+		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
+
+		size = min((unsigned long)(end - blk), max_blocks);
+		map = (unsigned long *)(sentry->cur_valid_map);
+		offset = __find_rev_next_bit(map, size, offset);
+		f2fs_bug_on(sbi, offset != size);
+		blk += size;
+	}
+#endif
+}
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
 				struct discard_cmd *dc)
@@ -782,6 +814,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
 			bio->bi_opf |= REQ_SYNC;
 			submit_bio(bio);
 			list_move_tail(&dc->list, &dcc->wait_list);
+			__check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
 		}
 	} else {
 		__remove_discard_cmd(sbi, dc);
@@ -838,7 +871,6 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
 		dc->len = blkaddr - dc->lstart;
 		dcc->undiscard_blks += dc->len;
 		__relocate_discard_cmd(dcc, dc);
-		f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
 		modified = true;
 	}
 
@@ -848,16 +880,12 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
 					di.start + blkaddr + 1 - di.lstart,
 					di.lstart + di.len - 1 - blkaddr,
 					NULL, NULL);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		} else {
 			dc->lstart++;
 			dc->len--;
 			dc->start++;
 			dcc->undiscard_blks += dc->len;
 			__relocate_discard_cmd(dcc, dc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		}
 	}
 }
@@ -918,8 +946,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
 			prev_dc->di.len += di.len;
 			dcc->undiscard_blks += di.len;
 			__relocate_discard_cmd(dcc, prev_dc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 			di = prev_dc->di;
 			tdc = prev_dc;
 			merged = true;
@@ -935,16 +961,12 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
 			__relocate_discard_cmd(dcc, next_dc);
 			if (tdc)
 				__remove_discard_cmd(sbi, tdc);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 			merged = true;
 		}
 
 		if (!merged) {
 			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
 							di.len, NULL, NULL);
-			f2fs_bug_on(sbi,
-				!__check_rb_tree_consistence(sbi, &dcc->root));
 		}
  next:
 		prev_dc = next_dc;
@@ -983,6 +1005,8 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
 	int i, iter = 0;
 
 	mutex_lock(&dcc->cmd_lock);
+	f2fs_bug_on(sbi,
+		!__check_rb_tree_consistence(sbi, &dcc->root));
 	blk_start_plug(&plug);
 	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
 		pend_list = &dcc->pend_list[i];
@@ -1000,22 +1024,47 @@ static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
 	mutex_unlock(&dcc->cmd_lock);
 }
 
+static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+							struct discard_cmd *dc)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	wait_for_completion_io(&dc->wait);
+	mutex_lock(&dcc->cmd_lock);
+	f2fs_bug_on(sbi, dc->state != D_DONE);
+	dc->ref--;
+	if (!dc->ref)
+		__remove_discard_cmd(sbi, dc);
+	mutex_unlock(&dcc->cmd_lock);
+}
+
 static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	struct list_head *wait_list = &(dcc->wait_list);
 	struct discard_cmd *dc, *tmp;
+	bool need_wait;
+
+next:
+	need_wait = false;
 
 	mutex_lock(&dcc->cmd_lock);
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
-		if (!wait_cond || dc->state == D_DONE) {
-			if (dc->ref)
-				continue;
+		if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
 			wait_for_completion_io(&dc->wait);
 			__remove_discard_cmd(sbi, dc);
+		} else {
+			dc->ref++;
+			need_wait = true;
+			break;
 		}
 	}
 	mutex_unlock(&dcc->cmd_lock);
+
+	if (need_wait) {
+		__wait_one_discard_bio(sbi, dc);
+		goto next;
+	}
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1037,14 +1086,19 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 	}
 	mutex_unlock(&dcc->cmd_lock);
 
-	if (need_wait) {
-		wait_for_completion_io(&dc->wait);
-		mutex_lock(&dcc->cmd_lock);
-		f2fs_bug_on(sbi, dc->state != D_DONE);
-		dc->ref--;
-		if (!dc->ref)
-			__remove_discard_cmd(sbi, dc);
-		mutex_unlock(&dcc->cmd_lock);
+	if (need_wait)
+		__wait_one_discard_bio(sbi, dc);
+}
+
+void stop_discard_thread(struct f2fs_sb_info *sbi)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	if (dcc && dcc->f2fs_issue_discard) {
+		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+		dcc->f2fs_issue_discard = NULL;
+		kthread_stop(discard_thread);
 	}
 }
 
@@ -1060,18 +1114,24 @@ static int issue_discard_thread(void *data)
 	struct f2fs_sb_info *sbi = data;
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	wait_queue_head_t *q = &dcc->discard_wait_queue;
-repeat:
-	if (kthread_should_stop())
-		return 0;
 
-	__issue_discard_cmd(sbi, true);
-	__wait_discard_cmd(sbi, true);
+	set_freezable();
 
-	congestion_wait(BLK_RW_SYNC, HZ/50);
+	do {
+		wait_event_interruptible(*q, kthread_should_stop() ||
+					freezing(current) ||
+					atomic_read(&dcc->discard_cmd_cnt));
+		if (try_to_freeze())
+			continue;
+		if (kthread_should_stop())
+			return 0;
 
-	wait_event_interruptible(*q, kthread_should_stop() ||
-				atomic_read(&dcc->discard_cmd_cnt));
-	goto repeat;
+		__issue_discard_cmd(sbi, true);
+		__wait_discard_cmd(sbi, true);
+
+		congestion_wait(BLK_RW_SYNC, HZ/50);
+	} while (!kthread_should_stop());
+	return 0;
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -1322,7 +1382,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 					sbi->blocks_per_seg, cur_pos);
 			len = next_pos - cur_pos;
 
-			if (force && len < cpc->trim_minlen)
+			if (f2fs_sb_mounted_blkzoned(sbi->sb) ||
+			    (force && len < cpc->trim_minlen))
 				goto skip;
 
 			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
@@ -1398,12 +1459,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
 	if (!dcc)
 		return;
 
-	if (dcc->f2fs_issue_discard) {
-		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
-
-		dcc->f2fs_issue_discard = NULL;
-		kthread_stop(discard_thread);
-	}
+	stop_discard_thread(sbi);
 
 	kfree(dcc);
 	SM_I(sbi)->dcc_info = NULL;
@@ -2040,66 +2096,80 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
 	return false;
 }
 
-static int __get_segment_type_2(struct page *page, enum page_type p_type)
+static int __get_segment_type_2(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA)
+	if (fio->type == DATA)
 		return CURSEG_HOT_DATA;
 	else
 		return CURSEG_HOT_NODE;
 }
 
-static int __get_segment_type_4(struct page *page, enum page_type p_type)
+static int __get_segment_type_4(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA) {
-		struct inode *inode = page->mapping->host;
+	if (fio->type == DATA) {
+		struct inode *inode = fio->page->mapping->host;
 
 		if (S_ISDIR(inode->i_mode))
 			return CURSEG_HOT_DATA;
 		else
 			return CURSEG_COLD_DATA;
 	} else {
-		if (IS_DNODE(page) && is_cold_node(page))
+		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
 			return CURSEG_WARM_NODE;
 		else
 			return CURSEG_COLD_NODE;
 	}
 }
 
-static int __get_segment_type_6(struct page *page, enum page_type p_type)
+static int __get_segment_type_6(struct f2fs_io_info *fio)
 {
-	if (p_type == DATA) {
-		struct inode *inode = page->mapping->host;
+	if (fio->type == DATA) {
+		struct inode *inode = fio->page->mapping->host;
 
-		if (is_cold_data(page) || file_is_cold(inode))
+		if (is_cold_data(fio->page) || file_is_cold(inode))
 			return CURSEG_COLD_DATA;
 		if (is_inode_flag_set(inode, FI_HOT_DATA))
 			return CURSEG_HOT_DATA;
 		return CURSEG_WARM_DATA;
 	} else {
-		if (IS_DNODE(page))
-			return is_cold_node(page) ? CURSEG_WARM_NODE :
+		if (IS_DNODE(fio->page))
+			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
 						CURSEG_HOT_NODE;
 		return CURSEG_COLD_NODE;
 	}
 }
 
-static int __get_segment_type(struct page *page, enum page_type p_type)
+static int __get_segment_type(struct f2fs_io_info *fio)
 {
-	switch (F2FS_P_SB(page)->active_logs) {
+	int type = 0;
+
+	switch (fio->sbi->active_logs) {
 	case 2:
-		return __get_segment_type_2(page, p_type);
+		type = __get_segment_type_2(fio);
+		break;
 	case 4:
-		return __get_segment_type_4(page, p_type);
+		type = __get_segment_type_4(fio);
+		break;
+	case 6:
+		type = __get_segment_type_6(fio);
+		break;
+	default:
+		f2fs_bug_on(fio->sbi, true);
 	}
-	/* NR_CURSEG_TYPE(6) logs by default */
-	f2fs_bug_on(F2FS_P_SB(page),
-		F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
-	return __get_segment_type_6(page, p_type);
+
+	if (IS_HOT(type))
+		fio->temp = HOT;
+	else if (IS_WARM(type))
+		fio->temp = WARM;
+	else
+		fio->temp = COLD;
+	return type;
 }
 
 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 		block_t old_blkaddr, block_t *new_blkaddr,
-		struct f2fs_summary *sum, int type)
+		struct f2fs_summary *sum, int type,
+		struct f2fs_io_info *fio, bool add_list)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2135,29 +2205,35 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	if (page && IS_NODESEG(type))
 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
+	if (add_list) {
+		struct f2fs_bio_info *io;
+
+		INIT_LIST_HEAD(&fio->list);
+		fio->in_list = true;
+		io = sbi->write_io[fio->type] + fio->temp;
+		spin_lock(&io->io_lock);
+		list_add_tail(&fio->list, &io->io_list);
+		spin_unlock(&io->io_lock);
+	}
+
 	mutex_unlock(&curseg->curseg_mutex);
 }
 
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 {
-	int type = __get_segment_type(fio->page, fio->type);
+	int type = __get_segment_type(fio);
 	int err;
 
-	if (fio->type == NODE || fio->type == DATA)
-		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
 reallocate:
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-					&fio->new_blkaddr, sum, type);
+			&fio->new_blkaddr, sum, type, fio, true);
 
 	/* writeout dirty page into bdev */
-	err = f2fs_submit_page_mbio(fio);
+	err = f2fs_submit_page_write(fio);
 	if (err == -EAGAIN) {
 		fio->old_blkaddr = fio->new_blkaddr;
 		goto reallocate;
 	}
-
-	if (fio->type == NODE || fio->type == DATA)
-		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
 }
 
 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2171,13 +2247,14 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
 		.new_blkaddr = page->index,
 		.page = page,
 		.encrypted_page = NULL,
+		.in_list = false,
 	};
 
 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
 		fio.op_flags &= ~REQ_META;
 
 	set_page_writeback(page);
-	f2fs_submit_page_mbio(&fio);
+	f2fs_submit_page_write(&fio);
 }
 
 void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
@@ -2296,8 +2373,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
 	if (PageWriteback(page)) {
 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
-						0, page->index, type, WRITE);
+		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
+						0, page->index, type);
 		if (ordered)
 			wait_on_page_writeback(page);
 		else
@@ -2455,6 +2532,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 
 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
+	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
+	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
 	int type = CURSEG_HOT_DATA;
 	int err;
 
@@ -2481,6 +2560,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 			return err;
 	}
 
+	/* sanity check for summary blocks */
+	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
+			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -3203,7 +3287,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
 
-	if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+	if (!f2fs_readonly(sbi->sb)) {
 		err = create_flush_cmd_control(sbi);
 		if (err)
 			return err;

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 010f336..6b871b4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h

@@ -27,6 +27,10 @@
 #define IS_DATASEG(t)	((t) <= CURSEG_COLD_DATA)
 #define IS_NODESEG(t)	((t) >= CURSEG_HOT_NODE)
 
+#define IS_HOT(t)	((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
+#define IS_WARM(t)	((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
+#define IS_COLD(t)	((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA)
+
 #define IS_CURSEG(sbi, seg)						\
 	(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) ||	\
 	 ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) ||	\

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0b89b0b..32e4c02 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c

@@ -22,6 +22,7 @@
 #include <linux/random.h>
 #include <linux/exportfs.h>
 #include <linux/blkdev.h>
+#include <linux/quotaops.h>
 #include <linux/f2fs_fs.h>
 #include <linux/sysfs.h>
 
@@ -35,9 +36,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/f2fs.h>
 
-static struct proc_dir_entry *f2fs_proc_root;
 static struct kmem_cache *f2fs_inode_cachep;
-static struct kset *f2fs_kset;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 
@@ -108,6 +107,8 @@ enum {
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
+	Opt_usrquota,
+	Opt_grpquota,
 	Opt_err,
 };
 
@@ -143,212 +144,11 @@ static match_table_t f2fs_tokens = {
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_grpquota, "grpquota"},
 	{Opt_err, NULL},
 };
 
-/* Sysfs support for f2fs */
-enum {
-	GC_THREAD,	/* struct f2fs_gc_thread */
-	SM_INFO,	/* struct f2fs_sm_info */
-	DCC_INFO,	/* struct discard_cmd_control */
-	NM_INFO,	/* struct f2fs_nm_info */
-	F2FS_SBI,	/* struct f2fs_sb_info */
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	FAULT_INFO_RATE,	/* struct f2fs_fault_info */
-	FAULT_INFO_TYPE,	/* struct f2fs_fault_info */
-#endif
-};
-
-struct f2fs_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
-	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
-			 const char *, size_t);
-	int struct_type;
-	int offset;
-};
-
-static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
-{
-	if (struct_type == GC_THREAD)
-		return (unsigned char *)sbi->gc_thread;
-	else if (struct_type == SM_INFO)
-		return (unsigned char *)SM_I(sbi);
-	else if (struct_type == DCC_INFO)
-		return (unsigned char *)SM_I(sbi)->dcc_info;
-	else if (struct_type == NM_INFO)
-		return (unsigned char *)NM_I(sbi);
-	else if (struct_type == F2FS_SBI)
-		return (unsigned char *)sbi;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	else if (struct_type == FAULT_INFO_RATE ||
-					struct_type == FAULT_INFO_TYPE)
-		return (unsigned char *)&sbi->fault_info;
-#endif
-	return NULL;
-}
-
-static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
-		struct f2fs_sb_info *sbi, char *buf)
-{
-	struct super_block *sb = sbi->sb;
-
-	if (!sb->s_bdev->bd_part)
-		return snprintf(buf, PAGE_SIZE, "0\n");
-
-	return snprintf(buf, PAGE_SIZE, "%llu\n",
-		(unsigned long long)(sbi->kbytes_written +
-			BD_PART_WRITTEN(sbi)));
-}
-
-static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
-			struct f2fs_sb_info *sbi, char *buf)
-{
-	unsigned char *ptr = NULL;
-	unsigned int *ui;
-
-	ptr = __struct_ptr(sbi, a->struct_type);
-	if (!ptr)
-		return -EINVAL;
-
-	ui = (unsigned int *)(ptr + a->offset);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
-}
-
-static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
-			struct f2fs_sb_info *sbi,
-			const char *buf, size_t count)
-{
-	unsigned char *ptr;
-	unsigned long t;
-	unsigned int *ui;
-	ssize_t ret;
-
-	ptr = __struct_ptr(sbi, a->struct_type);
-	if (!ptr)
-		return -EINVAL;
-
-	ui = (unsigned int *)(ptr + a->offset);
-
-	ret = kstrtoul(skip_spaces(buf), 0, &t);
-	if (ret < 0)
-		return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
-		return -EINVAL;
-#endif
-	*ui = t;
-	return count;
-}
-
-static ssize_t f2fs_attr_show(struct kobject *kobj,
-				struct attribute *attr, char *buf)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-								s_kobj);
-	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
-	return a->show ? a->show(a, sbi, buf) : 0;
-}
-
-static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
-						const char *buf, size_t len)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-									s_kobj);
-	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
-
-	return a->store ? a->store(a, sbi, buf, len) : 0;
-}
-
-static void f2fs_sb_release(struct kobject *kobj)
-{
-	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
-								s_kobj);
-	complete(&sbi->s_kobj_unregister);
-}
-
-#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
-static struct f2fs_attr f2fs_attr_##_name = {			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-	.struct_type = _struct_type,				\
-	.offset = _offset					\
-}
-
-#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)	\
-	F2FS_ATTR_OFFSET(struct_type, name, 0644,		\
-		f2fs_sbi_show, f2fs_sbi_store,			\
-		offsetof(struct struct_name, elname))
-
-#define F2FS_GENERAL_RO_ATTR(name) \
-static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
-
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
-F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
-F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
-F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
-#endif
-F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
-
-#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
-static struct attribute *f2fs_attrs[] = {
-	ATTR_LIST(gc_min_sleep_time),
-	ATTR_LIST(gc_max_sleep_time),
-	ATTR_LIST(gc_no_gc_sleep_time),
-	ATTR_LIST(gc_idle),
-	ATTR_LIST(reclaim_segments),
-	ATTR_LIST(max_small_discards),
-	ATTR_LIST(batched_trim_sections),
-	ATTR_LIST(ipu_policy),
-	ATTR_LIST(min_ipu_util),
-	ATTR_LIST(min_fsync_blocks),
-	ATTR_LIST(min_hot_blocks),
-	ATTR_LIST(max_victim_search),
-	ATTR_LIST(dir_level),
-	ATTR_LIST(ram_thresh),
-	ATTR_LIST(ra_nid_pages),
-	ATTR_LIST(dirty_nats_ratio),
-	ATTR_LIST(cp_interval),
-	ATTR_LIST(idle_interval),
-#ifdef CONFIG_F2FS_FAULT_INJECTION
-	ATTR_LIST(inject_rate),
-	ATTR_LIST(inject_type),
-#endif
-	ATTR_LIST(lifetime_write_kbytes),
-	NULL,
-};
-
-static const struct sysfs_ops f2fs_attr_ops = {
-	.show	= f2fs_attr_show,
-	.store	= f2fs_attr_store,
-};
-
-static struct kobj_type f2fs_ktype = {
-	.default_attrs	= f2fs_attrs,
-	.sysfs_ops	= &f2fs_attr_ops,
-	.release	= f2fs_sb_release,
-};
-
 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
 {
 	struct va_format vaf;
@@ -585,6 +385,20 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_nolazytime:
 			sb->s_flags &= ~MS_LAZYTIME;
 			break;
+#ifdef CONFIG_QUOTA
+		case Opt_usrquota:
+			set_opt(sbi, USRQUOTA);
+			break;
+		case Opt_grpquota:
+			set_opt(sbi, GRPQUOTA);
+			break;
+#else
+		case Opt_usrquota:
+		case Opt_grpquota:
+			f2fs_msg(sb, KERN_INFO,
+					"quota operations not supported");
+			break;
+#endif
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -624,7 +438,12 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	mutex_init(&fi->inmem_lock);
 	init_rwsem(&fi->dio_rwsem[READ]);
 	init_rwsem(&fi->dio_rwsem[WRITE]);
+	init_rwsem(&fi->i_mmap_sem);
 
+#ifdef CONFIG_QUOTA
+	memset(&fi->i_dquot, 0, sizeof(fi->i_dquot));
+	fi->i_reserved_quota = 0;
+#endif
 	/* Will be used by directory only */
 	fi->i_dir_level = F2FS_SB(sb)->dir_level;
 	return &fi->vfs_inode;
@@ -765,18 +584,13 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	kfree(sbi->devs);
 }
 
+static void f2fs_quota_off_umount(struct super_block *sb);
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int i;
 
-	if (sbi->s_proc) {
-		remove_proc_entry("segment_info", sbi->s_proc);
-		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sb->s_id, f2fs_proc_root);
-	}
-	kobject_del(&sbi->s_kobj);
-
-	stop_gc_thread(sbi);
+	f2fs_quota_off_umount(sb);
 
 	/* prevent remaining shrinker jobs */
 	mutex_lock(&sbi->umount_mutex);
@@ -797,7 +611,7 @@ static void f2fs_put_super(struct super_block *sb)
 	/* be sure to wait for any on-going discard commands */
 	f2fs_wait_discard_bios(sbi);
 
-	if (!sbi->discard_blks) {
+	if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT | CP_TRIMMED,
 		};
@@ -817,7 +631,7 @@ static void f2fs_put_super(struct super_block *sb)
 	mutex_unlock(&sbi->umount_mutex);
 
 	/* our cp_error case, we can wait for any writeback page */
-	f2fs_flush_merged_bios(sbi);
+	f2fs_flush_merged_writes(sbi);
 
 	iput(sbi->node_inode);
 	iput(sbi->meta_inode);
@@ -827,8 +641,8 @@ static void f2fs_put_super(struct super_block *sb)
 	destroy_segment_manager(sbi);
 
 	kfree(sbi->ckpt);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
+
+	f2fs_exit_sysfs(sbi);
 
 	sb->s_fs_info = NULL;
 	if (sbi->s_chksum_driver)
@@ -838,6 +652,8 @@ static void f2fs_put_super(struct super_block *sb)
 	destroy_device_list(sbi);
 	mempool_destroy(sbi->write_io_dummy);
 	destroy_percpu_info(sbi);
+	for (i = 0; i < NR_PAGE_TYPE; i++)
+		kfree(sbi->write_io[i]);
 	kfree(sbi);
 }
 
@@ -888,6 +704,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 	block_t total_count, user_block_count, start_count, ovp_count;
+	u64 avail_node_count;
 
 	total_count = le64_to_cpu(sbi->raw_super->block_count);
 	user_block_count = sbi->user_block_count;
@@ -898,11 +715,19 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
-	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
+	buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
+						sbi->reserved_blocks;
 
-	buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
-	buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
-							buf->f_bavail);
+	avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+
+	if (avail_node_count > user_block_count) {
+		buf->f_files = user_block_count;
+		buf->f_ffree = buf->f_bavail;
+	} else {
+		buf->f_files = avail_node_count;
+		buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
+					buf->f_bavail);
+	}
 
 	buf->f_namelen = F2FS_NAME_LEN;
 	buf->f_fsid.val[0] = (u32)id;
@@ -980,79 +805,19 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (test_opt(sbi, FAULT_INJECTION))
-		seq_puts(seq, ",fault_injection");
+		seq_printf(seq, ",fault_injection=%u",
+				sbi->fault_info.inject_rate);
+#endif
+#ifdef CONFIG_QUOTA
+	if (test_opt(sbi, USRQUOTA))
+		seq_puts(seq, ",usrquota");
+	if (test_opt(sbi, GRPQUOTA))
+		seq_puts(seq, ",grpquota");
 #endif
 
 	return 0;
 }
 
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
-{
-	struct super_block *sb = seq->private;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	unsigned int total_segs =
-			le32_to_cpu(sbi->raw_super->segment_count_main);
-	int i;
-
-	seq_puts(seq, "format: segment_type|valid_blocks\n"
-		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
-	for (i = 0; i < total_segs; i++) {
-		struct seg_entry *se = get_seg_entry(sbi, i);
-
-		if ((i % 10) == 0)
-			seq_printf(seq, "%-10d", i);
-		seq_printf(seq, "%d|%-3u", se->type,
-					get_valid_blocks(sbi, i, false));
-		if ((i % 10) == 9 || i == (total_segs - 1))
-			seq_putc(seq, '\n');
-		else
-			seq_putc(seq, ' ');
-	}
-
-	return 0;
-}
-
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
-{
-	struct super_block *sb = seq->private;
-	struct f2fs_sb_info *sbi = F2FS_SB(sb);
-	unsigned int total_segs =
-			le32_to_cpu(sbi->raw_super->segment_count_main);
-	int i, j;
-
-	seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
-		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
-
-	for (i = 0; i < total_segs; i++) {
-		struct seg_entry *se = get_seg_entry(sbi, i);
-
-		seq_printf(seq, "%-10d", i);
-		seq_printf(seq, "%d|%-3u|", se->type,
-					get_valid_blocks(sbi, i, false));
-		for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
-			seq_printf(seq, " %.2x", se->cur_valid_map[j]);
-		seq_putc(seq, '\n');
-	}
-	return 0;
-}
-
-#define F2FS_PROC_FILE_DEF(_name)					\
-static int _name##_open_fs(struct inode *inode, struct file *file)	\
-{									\
-	return single_open(file, _name##_seq_show, PDE_DATA(inode));	\
-}									\
-									\
-static const struct file_operations f2fs_seq_##_name##_fops = {		\
-	.open = _name##_open_fs,					\
-	.read = seq_read,						\
-	.llseek = seq_lseek,						\
-	.release = single_release,					\
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-
 static void default_options(struct f2fs_sb_info *sbi)
 {
 	/* init some FS parameters */
@@ -1089,6 +854,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct f2fs_mount_info org_mount_opt;
+	unsigned long old_sb_flags;
 	int err, active_logs;
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
@@ -1102,6 +868,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * need to restore them.
 	 */
 	org_mount_opt = sbi->mount_opt;
+	old_sb_flags = sb->s_flags;
 	active_logs = sbi->active_logs;
 
 	/* recover superblocks we couldn't write due to previous RO mount */
@@ -1113,7 +880,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 			clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
 	}
 
-	sbi->mount_opt.opt = 0;
 	default_options(sbi);
 
 	/* parse mount options */
@@ -1128,6 +894,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
 		goto skip;
 
+	if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
+		err = dquot_suspend(sb, -1);
+		if (err < 0)
+			goto restore_opts;
+	} else {
+		/* dquot_resume needs RW */
+		sb->s_flags &= ~MS_RDONLY;
+		dquot_resume(sb, -1);
+	}
+
 	/* disallow enable/disable extent_cache dynamically */
 	if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
 		err = -EINVAL;
@@ -1192,12 +968,237 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 restore_opts:
 	sbi->mount_opt = org_mount_opt;
 	sbi->active_logs = active_logs;
+	sb->s_flags = old_sb_flags;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	sbi->fault_info = ffi;
 #endif
 	return err;
 }
 
+#ifdef CONFIG_QUOTA
+/* Read data from quotafile */
+static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	struct address_space *mapping = inode->i_mapping;
+	block_t blkidx = F2FS_BYTES_TO_BLK(off);
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t toread;
+	loff_t i_size = i_size_read(inode);
+	struct page *page;
+	char *kaddr;
+
+	if (off > i_size)
+		return 0;
+
+	if (off + len > i_size)
+		len = i_size - off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
+repeat:
+		page = read_mapping_page(mapping, blkidx, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		lock_page(page);
+
+		if (unlikely(page->mapping != mapping)) {
+			f2fs_put_page(page, 1);
+			goto repeat;
+		}
+		if (unlikely(!PageUptodate(page))) {
+			f2fs_put_page(page, 1);
+			return -EIO;
+		}
+
+		kaddr = kmap_atomic(page);
+		memcpy(data, kaddr + offset, tocopy);
+		kunmap_atomic(kaddr);
+		f2fs_put_page(page, 1);
+
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blkidx++;
+	}
+	return len;
+}
+
+/* Write to quotafile */
+static ssize_t f2fs_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	struct address_space *mapping = inode->i_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	int offset = off & (sb->s_blocksize - 1);
+	size_t towrite = len;
+	struct page *page;
+	char *kaddr;
+	int err = 0;
+	int tocopy;
+
+	while (towrite > 0) {
+		tocopy = min_t(unsigned long, sb->s_blocksize - offset,
+								towrite);
+
+		err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
+							&page, NULL);
+		if (unlikely(err))
+			break;
+
+		kaddr = kmap_atomic(page);
+		memcpy(kaddr + offset, data, tocopy);
+		kunmap_atomic(kaddr);
+		flush_dcache_page(page);
+
+		a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
+						page, NULL);
+		offset = 0;
+		towrite -= tocopy;
+		off += tocopy;
+		data += tocopy;
+		cond_resched();
+	}
+
+	if (len == towrite)
+		return err;
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = current_time(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+	return len - towrite;
+}
+
+static struct dquot **f2fs_get_dquots(struct inode *inode)
+{
+	return F2FS_I(inode)->i_dquot;
+}
+
+static qsize_t *f2fs_get_reserved_space(struct inode *inode)
+{
+	return &F2FS_I(inode)->i_reserved_quota;
+}
+
+static int f2fs_quota_sync(struct super_block *sb, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int cnt;
+	int ret;
+
+	ret = dquot_writeback_dquots(sb, type);
+	if (ret)
+		return ret;
+
+	/*
+	 * Now when everything is written we can discard the pagecache so
+	 * that userspace sees the changes.
+	 */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+
+		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+		if (ret)
+			return ret;
+
+		inode_lock(dqopt->files[cnt]);
+		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
+		inode_unlock(dqopt->files[cnt]);
+	}
+	return 0;
+}
+
+static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
+							const struct path *path)
+{
+	struct inode *inode;
+	int err;
+
+	err = f2fs_quota_sync(sb, -1);
+	if (err)
+		return err;
+
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (err)
+		return err;
+
+	inode = d_inode(path->dentry);
+
+	inode_lock(inode);
+	F2FS_I(inode)->i_flags |= FS_NOATIME_FL | FS_IMMUTABLE_FL;
+	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+					S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+
+	return 0;
+}
+
+static int f2fs_quota_off(struct super_block *sb, int type)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	int err;
+
+	if (!inode || !igrab(inode))
+		return dquot_quota_off(sb, type);
+
+	f2fs_quota_sync(sb, -1);
+
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	F2FS_I(inode)->i_flags &= ~(FS_NOATIME_FL | FS_IMMUTABLE_FL);
+	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	f2fs_mark_inode_dirty_sync(inode, false);
+out_put:
+	iput(inode);
+	return err;
+}
+
+static void f2fs_quota_off_umount(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		f2fs_quota_off(sb, type);
+}
+
+static const struct dquot_operations f2fs_quota_operations = {
+	.get_reserved_space = f2fs_get_reserved_space,
+	.write_dquot	= dquot_commit,
+	.acquire_dquot	= dquot_acquire,
+	.release_dquot	= dquot_release,
+	.mark_dirty	= dquot_mark_dquot_dirty,
+	.write_info	= dquot_commit_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
+	.get_next_id	= dquot_get_next_id,
+};
+
+static const struct quotactl_ops f2fs_quotactl_ops = {
+	.quota_on	= f2fs_quota_on,
+	.quota_off	= f2fs_quota_off,
+	.quota_sync	= f2fs_quota_sync,
+	.get_state	= dquot_get_state,
+	.set_info	= dquot_set_dqinfo,
+	.get_dqblk	= dquot_get_dqblk,
+	.set_dqblk	= dquot_set_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
+};
+#else
+static inline void f2fs_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
 static struct super_operations f2fs_sops = {
 	.alloc_inode	= f2fs_alloc_inode,
 	.drop_inode	= f2fs_drop_inode,
@@ -1205,6 +1206,11 @@ static struct super_operations f2fs_sops = {
 	.write_inode	= f2fs_write_inode,
 	.dirty_inode	= f2fs_dirty_inode,
 	.show_options	= f2fs_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= f2fs_quota_read,
+	.quota_write	= f2fs_quota_write,
+	.get_dquots	= f2fs_get_dquots,
+#endif
 	.evict_inode	= f2fs_evict_inode,
 	.put_super	= f2fs_put_super,
 	.sync_fs	= f2fs_sync_fs,
@@ -1521,6 +1527,8 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned int ovp_segments, reserved_segments;
+	unsigned int main_segs, blocks_per_seg;
+	int i;
 
 	total = le32_to_cpu(raw_super->segment_count);
 	fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1542,6 +1550,20 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 		return 1;
 	}
 
+	main_segs = le32_to_cpu(raw_super->segment_count_main);
+	blocks_per_seg = sbi->blocks_per_seg;
+
+	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+		if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
+			le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
+			return 1;
+	}
+	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+		if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
+			le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
+			return 1;
+	}
+
 	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
 		return 1;
@@ -1552,7 +1574,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 static void init_sb_info(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = sbi->raw_super;
-	int i;
+	int i, j;
 
 	sbi->log_sectors_per_block =
 		le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1584,8 +1606,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
-	mutex_init(&sbi->wio_mutex[NODE]);
-	mutex_init(&sbi->wio_mutex[DATA]);
+	for (i = 0; i < NR_PAGE_TYPE - 1; i++)
+		for (j = HOT; j < NR_TEMP_TYPE; j++)
+			mutex_init(&sbi->wio_mutex[i][j]);
 	spin_lock_init(&sbi->cp_lock);
 }
 
@@ -1908,6 +1931,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (f2fs_sb_mounted_blkzoned(sb)) {
 		f2fs_msg(sb, KERN_ERR,
 			 "Zoned block device support is not enabled\n");
+		err = -EOPNOTSUPP;
 		goto free_sb_buf;
 	}
 #endif
@@ -1929,6 +1953,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_max_links = F2FS_LINK_MAX;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
+#ifdef CONFIG_QUOTA
+	sb->dq_op = &f2fs_quota_operations;
+	sb->s_qcop = &f2fs_quotactl_ops;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+#endif
+
 	sb->s_op = &f2fs_sops;
 	sb->s_cop = &f2fs_cryptops;
 	sb->s_xattr = f2fs_xattr_handlers;
@@ -1950,13 +1980,24 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	set_sbi_flag(sbi, SBI_POR_DOING);
 	spin_lock_init(&sbi->stat_lock);
 
-	init_rwsem(&sbi->read_io.io_rwsem);
-	sbi->read_io.sbi = sbi;
-	sbi->read_io.bio = NULL;
 	for (i = 0; i < NR_PAGE_TYPE; i++) {
-		init_rwsem(&sbi->write_io[i].io_rwsem);
-		sbi->write_io[i].sbi = sbi;
-		sbi->write_io[i].bio = NULL;
+		int n = (i == META) ? 1: NR_TEMP_TYPE;
+		int j;
+
+		sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
+								GFP_KERNEL);
+		if (!sbi->write_io[i]) {
+			err = -ENOMEM;
+			goto free_options;
+		}
+
+		for (j = HOT; j < n; j++) {
+			init_rwsem(&sbi->write_io[i][j].io_rwsem);
+			sbi->write_io[i][j].sbi = sbi;
+			sbi->write_io[i][j].bio = NULL;
+			spin_lock_init(&sbi->write_io[i][j].io_lock);
+			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
+		}
 	}
 
 	init_rwsem(&sbi->cp_rwsem);
@@ -1970,8 +2011,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (F2FS_IO_SIZE(sbi) > 1) {
 		sbi->write_io_dummy =
 			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
-		if (!sbi->write_io_dummy)
+		if (!sbi->write_io_dummy) {
+			err = -ENOMEM;
 			goto free_options;
+		}
 	}
 
 	/* get an inode for meta space */
@@ -2003,6 +2046,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->total_valid_block_count =
 				le64_to_cpu(sbi->ckpt->valid_block_count);
 	sbi->last_valid_block_count = sbi->total_valid_block_count;
+	sbi->reserved_blocks = 0;
 
 	for (i = 0; i < NR_INODE_TYPE; i++) {
 		INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2078,22 +2122,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_root_inode;
 	}
 
-	if (f2fs_proc_root)
-		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
-
-	if (sbi->s_proc) {
-		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_info_fops, sb);
-		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_bits_fops, sb);
-	}
-
-	sbi->s_kobj.kset = f2fs_kset;
-	init_completion(&sbi->s_kobj_unregister);
-	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
-							"%s", sb->s_id);
+	err = f2fs_init_sysfs(sbi);
 	if (err)
-		goto free_proc;
+		goto free_root_inode;
 
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2104,7 +2135,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		if (bdev_read_only(sb->s_bdev) &&
 				!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
 			err = -EROFS;
-			goto free_kobj;
+			goto free_sysfs;
 		}
 
 		if (need_fsck)
@@ -2118,7 +2149,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			need_fsck = true;
 			f2fs_msg(sb, KERN_ERR,
 				"Cannot recover all fsync data errno=%d", err);
-			goto free_kobj;
+			goto free_sysfs;
 		}
 	} else {
 		err = recover_fsync_data(sbi, true);
@@ -2127,7 +2158,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			err = -EINVAL;
 			f2fs_msg(sb, KERN_ERR,
 				"Need to recover fsync data");
-			goto free_kobj;
+			goto free_sysfs;
 		}
 	}
 skip_recovery:
@@ -2142,7 +2173,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		/* After POR, we can run background GC thread.*/
 		err = start_gc_thread(sbi);
 		if (err)
-			goto free_kobj;
+			goto free_sysfs;
 	}
 	kfree(options);
 
@@ -2160,17 +2191,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
 
-free_kobj:
+free_sysfs:
 	f2fs_sync_inode_meta(sbi);
-	kobject_del(&sbi->s_kobj);
-	kobject_put(&sbi->s_kobj);
-	wait_for_completion(&sbi->s_kobj_unregister);
-free_proc:
-	if (sbi->s_proc) {
-		remove_proc_entry("segment_info", sbi->s_proc);
-		remove_proc_entry("segment_bits", sbi->s_proc);
-		remove_proc_entry(sb->s_id, f2fs_proc_root);
-	}
+	f2fs_exit_sysfs(sbi);
 free_root_inode:
 	dput(sb->s_root);
 	sb->s_root = NULL;
@@ -2202,6 +2225,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 free_io_dummy:
 	mempool_destroy(sbi->write_io_dummy);
 free_options:
+	for (i = 0; i < NR_PAGE_TYPE; i++)
+		kfree(sbi->write_io[i]);
 	destroy_percpu_info(sbi);
 	kfree(options);
 free_sb_buf:
@@ -2228,8 +2253,11 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
 
 static void kill_f2fs_super(struct super_block *sb)
 {
-	if (sb->s_root)
+	if (sb->s_root) {
 		set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
+		stop_gc_thread(F2FS_SB(sb));
+		stop_discard_thread(F2FS_SB(sb));
+	}
 	kill_block_super(sb);
 }
 
@@ -2283,30 +2311,26 @@ static int __init init_f2fs_fs(void)
 	err = create_extent_cache();
 	if (err)
 		goto free_checkpoint_caches;
-	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
-	if (!f2fs_kset) {
-		err = -ENOMEM;
+	err = f2fs_register_sysfs();
+	if (err)
 		goto free_extent_cache;
-	}
 	err = register_shrinker(&f2fs_shrinker_info);
 	if (err)
-		goto free_kset;
-
+		goto free_sysfs;
 	err = register_filesystem(&f2fs_fs_type);
 	if (err)
 		goto free_shrinker;
 	err = f2fs_create_root_stats();
 	if (err)
 		goto free_filesystem;
-	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
 	return 0;
 
 free_filesystem:
 	unregister_filesystem(&f2fs_fs_type);
 free_shrinker:
 	unregister_shrinker(&f2fs_shrinker_info);
-free_kset:
-	kset_unregister(f2fs_kset);
+free_sysfs:
+	f2fs_unregister_sysfs();
 free_extent_cache:
 	destroy_extent_cache();
 free_checkpoint_caches:
@@ -2323,11 +2347,10 @@ static int __init init_f2fs_fs(void)
 
 static void __exit exit_f2fs_fs(void)
 {
-	remove_proc_entry("fs/f2fs", NULL);
 	f2fs_destroy_root_stats();
 	unregister_filesystem(&f2fs_fs_type);
 	unregister_shrinker(&f2fs_shrinker_info);
-	kset_unregister(f2fs_kset);
+	f2fs_unregister_sysfs();
 	destroy_extent_cache();
 	destroy_checkpoint_caches();
 	destroy_segment_manager_caches();

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
new file mode 100644
index 0000000..9adc202
--- /dev/null
+++ b/fs/f2fs/sysfs.c

@@ -0,0 +1,364 @@
+/*
+ * f2fs sysfs interface
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ * Copyright (c) 2017 Chao Yu <chao@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/proc_fs.h>
+#include <linux/f2fs_fs.h>
+
+#include "f2fs.h"
+#include "segment.h"
+#include "gc.h"
+
+static struct proc_dir_entry *f2fs_proc_root;
+static struct kset *f2fs_kset;
+
+/* Sysfs support for f2fs */
+enum {
+	GC_THREAD,	/* struct f2fs_gc_thread */
+	SM_INFO,	/* struct f2fs_sm_info */
+	DCC_INFO,	/* struct discard_cmd_control */
+	NM_INFO,	/* struct f2fs_nm_info */
+	F2FS_SBI,	/* struct f2fs_sb_info */
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	FAULT_INFO_RATE,	/* struct f2fs_fault_info */
+	FAULT_INFO_TYPE,	/* struct f2fs_fault_info */
+#endif
+	RESERVED_BLOCKS,
+};
+
+struct f2fs_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
+	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
+			 const char *, size_t);
+	int struct_type;
+	int offset;
+};
+
+static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
+{
+	if (struct_type == GC_THREAD)
+		return (unsigned char *)sbi->gc_thread;
+	else if (struct_type == SM_INFO)
+		return (unsigned char *)SM_I(sbi);
+	else if (struct_type == DCC_INFO)
+		return (unsigned char *)SM_I(sbi)->dcc_info;
+	else if (struct_type == NM_INFO)
+		return (unsigned char *)NM_I(sbi);
+	else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS)
+		return (unsigned char *)sbi;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	else if (struct_type == FAULT_INFO_RATE ||
+					struct_type == FAULT_INFO_TYPE)
+		return (unsigned char *)&sbi->fault_info;
+#endif
+	return NULL;
+}
+
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+		struct f2fs_sb_info *sbi, char *buf)
+{
+	struct super_block *sb = sbi->sb;
+
+	if (!sb->s_bdev->bd_part)
+		return snprintf(buf, PAGE_SIZE, "0\n");
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n",
+		(unsigned long long)(sbi->kbytes_written +
+			BD_PART_WRITTEN(sbi)));
+}
+
+static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi, char *buf)
+{
+	unsigned char *ptr = NULL;
+	unsigned int *ui;
+
+	ptr = __struct_ptr(sbi, a->struct_type);
+	if (!ptr)
+		return -EINVAL;
+
+	ui = (unsigned int *)(ptr + a->offset);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
+static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
+			struct f2fs_sb_info *sbi,
+			const char *buf, size_t count)
+{
+	unsigned char *ptr;
+	unsigned long t;
+	unsigned int *ui;
+	ssize_t ret;
+
+	ptr = __struct_ptr(sbi, a->struct_type);
+	if (!ptr)
+		return -EINVAL;
+
+	ui = (unsigned int *)(ptr + a->offset);
+
+	ret = kstrtoul(skip_spaces(buf), 0, &t);
+	if (ret < 0)
+		return ret;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
+		return -EINVAL;
+#endif
+	if (a->struct_type == RESERVED_BLOCKS) {
+		spin_lock(&sbi->stat_lock);
+		if ((unsigned long)sbi->total_valid_block_count + t >
+				(unsigned long)sbi->user_block_count) {
+			spin_unlock(&sbi->stat_lock);
+			return -EINVAL;
+		}
+		*ui = t;
+		spin_unlock(&sbi->stat_lock);
+		return count;
+	}
+	*ui = t;
+	return count;
+}
+
+static ssize_t f2fs_attr_show(struct kobject *kobj,
+				struct attribute *attr, char *buf)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+								s_kobj);
+	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+	return a->show ? a->show(a, sbi, buf) : 0;
+}
+
+static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
+						const char *buf, size_t len)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+									s_kobj);
+	struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
+
+	return a->store ? a->store(a, sbi, buf, len) : 0;
+}
+
+static void f2fs_sb_release(struct kobject *kobj)
+{
+	struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
+								s_kobj);
+	complete(&sbi->s_kobj_unregister);
+}
+
+#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
+static struct f2fs_attr f2fs_attr_##_name = {			\
+	.attr = {.name = __stringify(_name), .mode = _mode },	\
+	.show	= _show,					\
+	.store	= _store,					\
+	.struct_type = _struct_type,				\
+	.offset = _offset					\
+}
+
+#define F2FS_RW_ATTR(struct_type, struct_name, name, elname)	\
+	F2FS_ATTR_OFFSET(struct_type, name, 0644,		\
+		f2fs_sbi_show, f2fs_sbi_store,			\
+		offsetof(struct struct_name, elname))
+
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
+F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
+F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
+#endif
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
+
+#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
+static struct attribute *f2fs_attrs[] = {
+	ATTR_LIST(gc_min_sleep_time),
+	ATTR_LIST(gc_max_sleep_time),
+	ATTR_LIST(gc_no_gc_sleep_time),
+	ATTR_LIST(gc_idle),
+	ATTR_LIST(reclaim_segments),
+	ATTR_LIST(max_small_discards),
+	ATTR_LIST(batched_trim_sections),
+	ATTR_LIST(ipu_policy),
+	ATTR_LIST(min_ipu_util),
+	ATTR_LIST(min_fsync_blocks),
+	ATTR_LIST(min_hot_blocks),
+	ATTR_LIST(max_victim_search),
+	ATTR_LIST(dir_level),
+	ATTR_LIST(ram_thresh),
+	ATTR_LIST(ra_nid_pages),
+	ATTR_LIST(dirty_nats_ratio),
+	ATTR_LIST(cp_interval),
+	ATTR_LIST(idle_interval),
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	ATTR_LIST(inject_rate),
+	ATTR_LIST(inject_type),
+#endif
+	ATTR_LIST(lifetime_write_kbytes),
+	ATTR_LIST(reserved_blocks),
+	NULL,
+};
+
+static const struct sysfs_ops f2fs_attr_ops = {
+	.show	= f2fs_attr_show,
+	.store	= f2fs_attr_store,
+};
+
+static struct kobj_type f2fs_ktype = {
+	.default_attrs	= f2fs_attrs,
+	.sysfs_ops	= &f2fs_attr_ops,
+	.release	= f2fs_sb_release,
+};
+
+static int segment_info_seq_show(struct seq_file *seq, void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	unsigned int total_segs =
+			le32_to_cpu(sbi->raw_super->segment_count_main);
+	int i;
+
+	seq_puts(seq, "format: segment_type|valid_blocks\n"
+		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+	for (i = 0; i < total_segs; i++) {
+		struct seg_entry *se = get_seg_entry(sbi, i);
+
+		if ((i % 10) == 0)
+			seq_printf(seq, "%-10d", i);
+		seq_printf(seq, "%d|%-3u", se->type,
+					get_valid_blocks(sbi, i, false));
+		if ((i % 10) == 9 || i == (total_segs - 1))
+			seq_putc(seq, '\n');
+		else
+			seq_putc(seq, ' ');
+	}
+
+	return 0;
+}
+
+static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	unsigned int total_segs =
+			le32_to_cpu(sbi->raw_super->segment_count_main);
+	int i, j;
+
+	seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
+		"segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
+	for (i = 0; i < total_segs; i++) {
+		struct seg_entry *se = get_seg_entry(sbi, i);
+
+		seq_printf(seq, "%-10d", i);
+		seq_printf(seq, "%d|%-3u|", se->type,
+					get_valid_blocks(sbi, i, false));
+		for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
+			seq_printf(seq, " %.2x", se->cur_valid_map[j]);
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+#define F2FS_PROC_FILE_DEF(_name)					\
+static int _name##_open_fs(struct inode *inode, struct file *file)	\
+{									\
+	return single_open(file, _name##_seq_show, PDE_DATA(inode));	\
+}									\
+									\
+static const struct file_operations f2fs_seq_##_name##_fops = {		\
+	.open = _name##_open_fs,					\
+	.read = seq_read,						\
+	.llseek = seq_lseek,						\
+	.release = single_release,					\
+};
+
+F2FS_PROC_FILE_DEF(segment_info);
+F2FS_PROC_FILE_DEF(segment_bits);
+
+int __init f2fs_register_sysfs(void)
+{
+	f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
+
+	f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
+	if (!f2fs_kset)
+		return -ENOMEM;
+	return 0;
+}
+
+void f2fs_unregister_sysfs(void)
+{
+	kset_unregister(f2fs_kset);
+	remove_proc_entry("fs/f2fs", NULL);
+}
+
+int f2fs_init_sysfs(struct f2fs_sb_info *sbi)
+{
+	struct super_block *sb = sbi->sb;
+	int err;
+
+	if (f2fs_proc_root)
+		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
+
+	if (sbi->s_proc) {
+		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_info_fops, sb);
+		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
+				 &f2fs_seq_segment_bits_fops, sb);
+	}
+
+	sbi->s_kobj.kset = f2fs_kset;
+	init_completion(&sbi->s_kobj_unregister);
+	err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
+							"%s", sb->s_id);
+	if (err)
+		goto err_out;
+	return 0;
+err_out:
+	if (sbi->s_proc) {
+		remove_proc_entry("segment_info", sbi->s_proc);
+		remove_proc_entry("segment_bits", sbi->s_proc);
+		remove_proc_entry(sb->s_id, f2fs_proc_root);
+	}
+	return err;
+}
+
+void f2fs_exit_sysfs(struct f2fs_sb_info *sbi)
+{
+	kobject_del(&sbi->s_kobj);
+	kobject_put(&sbi->s_kobj);
+	wait_for_completion(&sbi->s_kobj_unregister);
+
+	if (sbi->s_proc) {
+		remove_proc_entry("segment_info", sbi->s_proc);
+		remove_proc_entry("segment_bits", sbi->s_proc);
+		remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
+	}
+}

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8b426f8..245c430 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -380,8 +380,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		struct page *page = radix_tree_deref_slot_protected(slot,
 							&mapping->tree_lock);
 		if (likely(page) && PageDirty(page)) {
-			__dec_wb_stat(old_wb, WB_RECLAIMABLE);
-			__inc_wb_stat(new_wb, WB_RECLAIMABLE);
+			dec_wb_stat(old_wb, WB_RECLAIMABLE);
+			inc_wb_stat(new_wb, WB_RECLAIMABLE);
 		}
 	}
 
@@ -391,8 +391,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 							&mapping->tree_lock);
 		if (likely(page)) {
 			WARN_ON_ONCE(!PageWriteback(page));
-			__dec_wb_stat(old_wb, WB_WRITEBACK);
-			__inc_wb_stat(new_wb, WB_WRITEBACK);
+			dec_wb_stat(old_wb, WB_WRITEBACK);
+			inc_wb_stat(new_wb, WB_WRITEBACK);
 		}
 	}
 

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d44f545..5238861 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -851,6 +851,16 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 	return MIGRATEPAGE_SUCCESS;
 }
 
+static int hugetlbfs_error_remove_page(struct address_space *mapping,
+				struct page *page)
+{
+	struct inode *inode = mapping->host;
+
+	remove_huge_page(page);
+	hugetlb_fix_reserve_counts(inode);
+	return 0;
+}
+
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -966,6 +976,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 	.write_end	= hugetlbfs_write_end,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 	.migratepage    = hugetlbfs_migrate_page,
+	.error_remove_page	= hugetlbfs_error_remove_page,
 };
 
 

diff --git a/fs/iomap.c b/fs/iomap.c
index fa6cd5b..1732228 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c

@@ -584,6 +584,100 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
 }
 EXPORT_SYMBOL_GPL(iomap_fiemap);
 
+static loff_t
+iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_HOLE);
+		if (offset < 0)
+			return length;
+		/* fall through */
+	case IOMAP_HOLE:
+		*(loff_t *)data = offset;
+		return 0;
+	default:
+		return length;
+	}
+}
+
+loff_t
+iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_hole_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_hole);
+
+static loff_t
+iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+		      void *data, struct iomap *iomap)
+{
+	switch (iomap->type) {
+	case IOMAP_HOLE:
+		return length;
+	case IOMAP_UNWRITTEN:
+		offset = page_cache_seek_hole_data(inode, offset, length,
+						   SEEK_DATA);
+		if (offset < 0)
+			return length;
+		/*FALLTHRU*/
+	default:
+		*(loff_t *)data = offset;
+		return 0;
+	}
+}
+
+loff_t
+iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
+{
+	loff_t size = i_size_read(inode);
+	loff_t length = size - offset;
+	loff_t ret;
+
+	/* Nothing to be found beyond the end of the file. */
+	if (offset >= size)
+		return -ENXIO;
+
+	while (length > 0) {
+		ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
+				  &offset, iomap_seek_data_actor);
+		if (ret < 0)
+			return ret;
+		if (ret == 0)
+			break;
+
+		offset += ret;
+		length -= ret;
+	}
+
+	if (length <= 0)
+		return -ENXIO;
+	return offset;
+}
+EXPORT_SYMBOL_GPL(iomap_seek_data);
+
 /*
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index d3e40db..c349fc0 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c

@@ -381,8 +381,9 @@ static void encode_nlm4_lock(struct xdr_stream *xdr,
  */
 static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -402,8 +403,9 @@ static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -424,8 +426,9 @@ static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nlm_args *args)
+				  const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -442,8 +445,9 @@ static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nlm_args *args)
+				    const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -458,8 +462,10 @@ static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_res(struct rpc_rqst *req,
 			     struct xdr_stream *xdr,
-			     const struct nlm_res *result)
+			     const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void nlm4_xdr_enc_res(struct rpc_rqst *req,
  */
 static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_res *result)
+				 const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm4_stat(xdr, result->status);
 	if (result->status == nlm_lck_denied)
@@ -525,8 +533,9 @@ static int decode_nlm4_testrply(struct xdr_stream *xdr,
 
 static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nlm_res *result)
+				void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -545,8 +554,9 @@ static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    struct nlm_res *result)
+			    void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -566,15 +576,15 @@ static int nlm4_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)					\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
+	.p_encode    = nlm4_xdr_enc_##argtype,				\
+	.p_decode    = nlm4_xdr_dec_##restype,				\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm4_procedures[] = {
+static const struct rpc_procinfo nlm4_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
@@ -592,8 +602,10 @@ static struct rpc_procinfo	nlm4_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version4_counts[ARRAY_SIZE(nlm4_procedures)];
 const struct rpc_version nlm_version4 = {
 	.number		= 4,
 	.nrprocs	= ARRAY_SIZE(nlm4_procedures),
 	.procs		= nlm4_procedures,
+	.counts		= nlm_version4_counts,
 };

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 3e9f787..3b4724a 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c

@@ -374,8 +374,9 @@ static void encode_nlm_lock(struct xdr_stream *xdr,
  */
 static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -395,8 +396,9 @@ static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -417,8 +419,9 @@ static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nlm_args *args)
+				 const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -435,8 +438,9 @@ static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nlm_args *args)
+				   const void *data)
 {
+	const struct nlm_args *args = data;
 	const struct nlm_lock *lock = &args->lock;
 
 	encode_cookie(xdr, &args->cookie);
@@ -451,8 +455,10 @@ static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
  */
 static void nlm_xdr_enc_res(struct rpc_rqst *req,
 			    struct xdr_stream *xdr,
-			    const struct nlm_res *result)
+			    const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 }
@@ -479,8 +485,10 @@ static void encode_nlm_testrply(struct xdr_stream *xdr,
 
 static void nlm_xdr_enc_testres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				const struct nlm_res *result)
+				const void *data)
 {
+	const struct nlm_res *result = data;
+
 	encode_cookie(xdr, &result->cookie);
 	encode_nlm_stat(xdr, result->status);
 	encode_nlm_testrply(xdr, result);
@@ -523,8 +531,9 @@ static int decode_nlm_testrply(struct xdr_stream *xdr,
 
 static int nlm_xdr_dec_testres(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nlm_res *result)
+			       void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -543,8 +552,9 @@ static int nlm_xdr_dec_testres(struct rpc_rqst *req,
  */
 static int nlm_xdr_dec_res(struct rpc_rqst *req,
 			   struct xdr_stream *xdr,
-			   struct nlm_res *result)
+			   void *data)
 {
+	struct nlm_res *result = data;
 	int error;
 
 	error = decode_cookie(xdr, &result->cookie);
@@ -564,15 +574,15 @@ static int nlm_xdr_dec_res(struct rpc_rqst *req,
 #define PROC(proc, argtype, restype)	\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
+	.p_encode    = nlm_xdr_enc_##argtype,		\
+	.p_decode    = nlm_xdr_dec_##restype,				\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
 	.p_name      = #proc,						\
 	}
 
-static struct rpc_procinfo	nlm_procedures[] = {
+static const struct rpc_procinfo nlm_procedures[] = {
 	PROC(TEST,		testargs,	testres),
 	PROC(LOCK,		lockargs,	res),
 	PROC(CANCEL,		cancargs,	res),
@@ -590,16 +600,20 @@ static struct rpc_procinfo	nlm_procedures[] = {
 	PROC(GRANTED_RES,	res,		norep),
 };
 
+static unsigned int nlm_version1_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
+	.counts		= nlm_version1_counts,
 };
 
+static unsigned int nlm_version3_counts[ARRAY_SIZE(nlm_procedures)];
 static const struct rpc_version	nlm_version3 = {
-		.number		= 3,
-		.nrprocs	= ARRAY_SIZE(nlm_procedures),
-		.procs		= nlm_procedures,
+	.number		= 3,
+	.nrprocs	= ARRAY_SIZE(nlm_procedures),
+	.procs		= nlm_procedures,
+	.counts		= nlm_version3_counts,
 };
 
 static const struct rpc_version	*nlm_versions[] = {
@@ -613,9 +627,9 @@ static const struct rpc_version	*nlm_versions[] = {
 static struct rpc_stat		nlm_rpc_stats;
 
 const struct rpc_program	nlm_program = {
-		.name		= "lockd",
-		.number		= NLM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nlm_versions),
-		.version	= nlm_versions,
-		.stats		= &nlm_rpc_stats,
+	.name		= "lockd",
+	.number		= NLM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nlm_versions),
+	.version	= nlm_versions,
+	.stats		= &nlm_rpc_stats,
 };

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 19166d4..9d8166c 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c

@@ -476,22 +476,23 @@ static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 }
 
 static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    const struct nsm_args *argp)
+			    const void *argp)
 {
 	encode_mon_id(xdr, argp);
 	encode_priv(xdr, argp);
 }
 
 static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      const struct nsm_args *argp)
+			      const void *argp)
 {
 	encode_mon_id(xdr, argp);
 }
 
 static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nsm_res *resp)
+				void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4 + 4);
@@ -507,8 +508,9 @@ static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
 
 static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 			    struct xdr_stream *xdr,
-			    struct nsm_res *resp)
+			    void *data)
 {
+	struct nsm_res *resp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -529,11 +531,11 @@ static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-static struct rpc_procinfo	nsm_procedures[] = {
+static const struct rpc_procinfo nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
+		.p_encode	= nsm_xdr_enc_mon,
+		.p_decode	= nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
@@ -541,8 +543,8 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
-		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
+		.p_encode	= nsm_xdr_enc_unmon,
+		.p_decode	= nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
@@ -550,10 +552,12 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 };
 
+static unsigned int nsm_version1_counts[ARRAY_SIZE(nsm_procedures)];
 static const struct rpc_version nsm_version1 = {
-		.number		= 1,
-		.nrprocs	= ARRAY_SIZE(nsm_procedures),
-		.procs		= nsm_procedures
+	.number		= 1,
+	.nrprocs	= ARRAY_SIZE(nsm_procedures),
+	.procs		= nsm_procedures,
+	.counts		= nsm_version1_counts,
 };
 
 static const struct rpc_version *nsm_version[] = {
@@ -563,9 +567,9 @@ static const struct rpc_version *nsm_version[] = {
 static struct rpc_stat		nsm_stats;
 
 static const struct rpc_program nsm_program = {
-		.name		= "statd",
-		.number		= NSM_PROGRAM,
-		.nrvers		= ARRAY_SIZE(nsm_version),
-		.version	= nsm_version,
-		.stats		= &nsm_stats
+	.name		= "statd",
+	.number		= NSM_PROGRAM,
+	.nrvers		= ARRAY_SIZE(nsm_version),
+	.version	= nsm_version,
+	.stats		= &nsm_stats
 };

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 5d481e8..726b6ce 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c

@@ -739,27 +739,33 @@ module_exit(exit_nlm);
 /*
  * Define NLM program and procedures
  */
-static struct svc_version	nlmsvc_version1 = {
-		.vs_vers	= 1,
-		.vs_nproc	= 17,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static unsigned int nlmsvc_version1_count[17];
+static const struct svc_version	nlmsvc_version1 = {
+	.vs_vers	= 1,
+	.vs_nproc	= 17,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version1_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
-static struct svc_version	nlmsvc_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static unsigned int nlmsvc_version3_count[24];
+static const struct svc_version	nlmsvc_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures,
+	.vs_count	= nlmsvc_version3_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #ifdef CONFIG_LOCKD_V4
-static struct svc_version	nlmsvc_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 24,
-		.vs_proc	= nlmsvc_procedures4,
-		.vs_xdrsize	= NLMSVC_XDRSIZE,
+static unsigned int nlmsvc_version4_count[24];
+static const struct svc_version	nlmsvc_version4 = {
+	.vs_vers	= 4,
+	.vs_nproc	= 24,
+	.vs_proc	= nlmsvc_procedures4,
+	.vs_count	= nlmsvc_version4_count,
+	.vs_xdrsize	= NLMSVC_XDRSIZE,
 };
 #endif
-static struct svc_version *	nlmsvc_version[] = {
+static const struct svc_version *nlmsvc_version[] = {
 	[1] = &nlmsvc_version1,
 	[3] = &nlmsvc_version3,
 #ifdef CONFIG_LOCKD_V4

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 09c576f..82925f1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c

@@ -62,7 +62,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -72,9 +72,9 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -99,9 +99,15 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlm4svc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -141,9 +147,15 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
 {
+	return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -170,13 +182,19 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -203,14 +221,21 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -219,6 +244,12 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlm4svc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -243,9 +274,10 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *,  struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -261,7 +293,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -273,48 +305,44 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
 }
 
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
 }
 
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
 }
 
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
 }
 
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlm4svc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -345,9 +373,10 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -378,22 +407,23 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlm4svc_proc_lock(rqstp, argp, resp);
+	return nlm4svc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlm4svc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -409,9 +439,10 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -429,9 +460,10 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
         if (!nlmsvc_ops)
                 return rpc_success;
 
@@ -463,9 +495,9 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlm4svc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlm4svc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlm4svc_encode_##xrest,	\
+ { .pc_func	= nlm4svc_proc_##name,	\
+   .pc_decode	= nlm4svc_decode_##xargt,	\
+   .pc_encode	= nlm4svc_encode_##xrest,	\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
@@ -475,7 +507,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)				/* netobj */
 #define	St	1					/* status */
 #define	Rg	4					/* range (offset + length) */
-struct svc_procedure		nlmsvc_procedures4[] = {
+const struct svc_procedure nlmsvc_procedures4[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),

diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index fb26b9f..0791516 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c

@@ -92,7 +92,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NULL: Test for presence of service
  */
 static __be32
-nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nlmsvc_proc_null(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: NULL          called\n");
 	return rpc_success;
@@ -102,9 +102,9 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * TEST: Check for conflicting lock
  */
 static __be32
-nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+__nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -130,9 +130,15 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				         struct nlm_res  *resp)
+nlmsvc_proc_test(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_test(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	__be32 rc = rpc_success;
@@ -172,9 +178,15 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 }
 
 static __be32
-nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+nlmsvc_proc_lock(struct svc_rqst *rqstp)
 {
+	return __nlmsvc_proc_lock(rqstp, rqstp->rq_resp);
+}
+
+static __be32
+__nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -202,13 +214,19 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_cancel(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_cancel(rqstp, rqstp->rq_resp);
+}
+
 /*
  * UNLOCK: release a lock
  */
 static __be32
-nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				           struct nlm_res  *resp)
+__nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 	struct net *net = SVC_NET(rqstp);
@@ -236,14 +254,21 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_unlock(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_unlock(rqstp, rqstp->rq_resp);
+}
+
 /*
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
 static __be32
-nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+__nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	resp->cookie = argp->cookie;
 
 	dprintk("lockd: GRANTED       called\n");
@@ -252,6 +277,12 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
+static __be32
+nlmsvc_proc_granted(struct svc_rqst *rqstp)
+{
+	return __nlmsvc_proc_granted(rqstp, rqstp->rq_resp);
+}
+
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -284,9 +315,10 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc,
+		__be32 (*func)(struct svc_rqst *, struct nlm_res *))
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
 	__be32 stat;
@@ -302,7 +334,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	if (call == NULL)
 		return rpc_system_err;
 
-	stat = func(rqstp, argp, &call->a_res);
+	stat = func(rqstp, &call->a_res);
 	if (stat != 0) {
 		nlmsvc_release_call(call);
 		return stat;
@@ -314,50 +346,46 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
 	return rpc_success;
 }
 
-static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, __nlmsvc_proc_test);
 }
 
-static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, __nlmsvc_proc_lock);
 }
 
-static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					       void	       *resp)
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, __nlmsvc_proc_cancel);
 }
 
 static __be32
-nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                               void            *resp)
+nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlmsvc_proc_unlock);
 }
 
 static __be32
-nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_msg(struct svc_rqst *rqstp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
-	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, __nlmsvc_proc_granted);
 }
 
 /*
  * SHARE: create a DOS share or alter existing share.
  */
 static __be32
-nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
-				          struct nlm_res  *resp)
+nlmsvc_proc_share(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -388,9 +416,10 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
  * UNSHARE: Release a DOS share.
  */
 static __be32
-nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+	struct nlm_res *resp = rqstp->rq_resp;
 	struct nlm_host	*host;
 	struct nlm_file	*file;
 
@@ -421,22 +450,23 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
  * NM_LOCK: Create an unmonitored lock
  */
 static __be32
-nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
-				            struct nlm_res  *resp)
+nlmsvc_proc_nm_lock(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	dprintk("lockd: NM_LOCK       called\n");
 
 	argp->monitor = 0;		/* just clean the monitor flag */
-	return nlmsvc_proc_lock(rqstp, argp, resp);
+	return nlmsvc_proc_lock(rqstp);
 }
 
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
 static __be32
-nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void            *resp)
+nlmsvc_proc_free_all(struct svc_rqst *rqstp)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_host	*host;
 
 	/* Obtain client */
@@ -452,9 +482,10 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
 static __be32
-nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
-					      void	        *resp)
+nlmsvc_proc_sm_notify(struct svc_rqst *rqstp)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -472,9 +503,10 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
  * client sent a GRANTED_RES, let's remove the associated block
  */
 static __be32
-nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
-                                                void            *resp)
+nlmsvc_proc_granted_res(struct svc_rqst *rqstp)
 {
+	struct nlm_res *argp = rqstp->rq_argp;
+
 	if (!nlmsvc_ops)
 		return rpc_success;
 
@@ -505,9 +537,9 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 struct nlm_void			{ int dummy; };
 
 #define PROC(name, xargt, xrest, argt, rest, respsize)	\
- { .pc_func	= (svc_procfunc) nlmsvc_proc_##name,	\
-   .pc_decode	= (kxdrproc_t) nlmsvc_decode_##xargt,	\
-   .pc_encode	= (kxdrproc_t) nlmsvc_encode_##xrest,	\
+ { .pc_func	= nlmsvc_proc_##name,			\
+   .pc_decode	= nlmsvc_decode_##xargt,		\
+   .pc_encode	= nlmsvc_encode_##xrest,		\
    .pc_release	= NULL,					\
    .pc_argsize	= sizeof(struct nlm_##argt),		\
    .pc_ressize	= sizeof(struct nlm_##rest),		\
@@ -519,7 +551,7 @@ struct nlm_void			{ int dummy; };
 #define	No	(1+1024/4)			/* Net Obj */
 #define	Rg	2				/* range - offset + size */
 
-struct svc_procedure		nlmsvc_procedures[] = {
+const struct svc_procedure nlmsvc_procedures[] = {
   PROC(null,		void,		void,		void,	void, 1),
   PROC(test,		testargs,	testres,	args,	res, Ck+St+2+No+Rg),
   PROC(lock,		lockargs,	res,		args,	res, Ck+St),

diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 5b651da..442bbd0 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c

@@ -182,8 +182,9 @@ nlm_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -199,16 +200,19 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -227,8 +231,9 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie)))
@@ -243,8 +248,10 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
 		return 0;
@@ -253,8 +260,9 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -274,8 +282,10 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -284,8 +294,10 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -293,8 +305,9 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -305,8 +318,10 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -316,8 +331,10 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -325,13 +342,13 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }

diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index dfa4789..2a0cd56 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c

@@ -179,8 +179,9 @@ nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -196,16 +197,19 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -224,8 +228,9 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	u32	exclusive;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie)))
@@ -240,8 +245,10 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
 		return 0;
@@ -250,8 +257,9 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	memset(lock, 0, sizeof(*lock));
@@ -271,8 +279,10 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -281,8 +291,10 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_resp;
+
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
 	*p++ = resp->status;
@@ -290,8 +302,9 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -302,8 +315,10 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_reboot *argp = rqstp->rq_argp;
+
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
@@ -313,8 +328,10 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nlm_res *resp = rqstp->rq_argp;
+
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
 	resp->status = *p++;
@@ -322,13 +339,13 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 98f4e57..1fb1189 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile

@@ -7,7 +7,7 @@
 CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
 			   io.o direct.o pagelist.o read.o symlink.o unlink.o \
-			   write.o namespace.o mount_clnt.o nfstrace.o
+			   write.o namespace.o mount_clnt.o nfstrace.o export.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 73a1f92..3432387 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c

@@ -439,7 +439,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 /*
  * Define NFS4 callback program
  */
-static struct svc_version *nfs4_callback_version[] = {
+static const struct svc_version *nfs4_callback_version[] = {
 	[1] = &nfs4_callback_version1,
 	[4] = &nfs4_callback_version4,
 };

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c701c30..3dc54d7 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h

@@ -114,8 +114,7 @@ struct cb_sequenceres {
 	uint32_t			csr_target_highestslotid;
 };
 
-extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-				       struct cb_sequenceres *res,
+extern __be32 nfs4_callback_sequence(void *argp, void *resp,
 				       struct cb_process_state *cps);
 
 #define RCA4_TYPE_MASK_RDATA_DLG	0
@@ -134,15 +133,13 @@ struct cb_recallanyargs {
 	uint32_t	craa_type_mask;
 };
 
-extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
-					void *dummy,
+extern __be32 nfs4_callback_recallany(void *argp, void *resp,
 					struct cb_process_state *cps);
 
 struct cb_recallslotargs {
 	uint32_t	crsa_target_highest_slotid;
 };
-extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
-					 void *dummy,
+extern __be32 nfs4_callback_recallslot(void *argp, void *resp,
 					 struct cb_process_state *cps);
 
 struct cb_layoutrecallargs {
@@ -159,9 +156,8 @@ struct cb_layoutrecallargs {
 	};
 };
 
-extern __be32 nfs4_callback_layoutrecall(
-	struct cb_layoutrecallargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_devicenotifyitem {
 	uint32_t		cbd_notify_type;
@@ -175,9 +171,8 @@ struct cb_devicenotifyargs {
 	struct cb_devicenotifyitem	 *devs;
 };
 
-extern __be32 nfs4_callback_devicenotify(
-	struct cb_devicenotifyargs *args,
-	void *dummy, struct cb_process_state *cps);
+extern __be32 nfs4_callback_devicenotify(void *argp, void *resp,
+		struct cb_process_state *cps);
 
 struct cb_notify_lock_args {
 	struct nfs_fh			cbnl_fh;
@@ -185,15 +180,13 @@ struct cb_notify_lock_args {
 	bool				cbnl_valid;
 };
 
-extern __be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args,
-					 void *dummy,
+extern __be32 nfs4_callback_notify_lock(void *argp, void *resp,
 					 struct cb_process_state *cps);
 #endif /* CONFIG_NFS_V4_1 */
 extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-				    struct cb_getattrres *res,
+extern __be32 nfs4_callback_getattr(void *argp, void *resp,
 				    struct cb_process_state *cps);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+extern __be32 nfs4_callback_recall(void *argp, void *resp,
 				   struct cb_process_state *cps);
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 52479f1..5427cdf 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c

@@ -19,10 +19,11 @@
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
-			     struct cb_getattrres *res,
+__be32 nfs4_callback_getattr(void *argp, void *resp,
 			     struct cb_process_state *cps)
 {
+	struct cb_getattrargs *args = argp;
+	struct cb_getattrres *res = resp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
@@ -68,9 +69,10 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 	return res->status;
 }
 
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+__be32 nfs4_callback_recall(void *argp, void *resp,
 			    struct cb_process_state *cps)
 {
+	struct cb_recallargs *args = argp;
 	struct inode *inode;
 	__be32 res;
 	
@@ -324,9 +326,10 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
 	return initiate_bulk_draining(clp, args);
 }
 
-__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_layoutrecall(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_layoutrecallargs *args = argp;
 	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
@@ -345,9 +348,10 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
 	do_callback_layoutrecall(clp, &args);
 }
 
-__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
-				  void *dummy, struct cb_process_state *cps)
+__be32 nfs4_callback_devicenotify(void *argp, void *resp,
+				  struct cb_process_state *cps)
 {
+	struct cb_devicenotifyargs *args = argp;
 	int i;
 	__be32 res = 0;
 	struct nfs_client *clp = cps->clp;
@@ -469,10 +473,11 @@ static bool referring_call_exists(struct nfs_client *clp,
 	return status;
 }
 
-__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
-			      struct cb_sequenceres *res,
+__be32 nfs4_callback_sequence(void *argp, void *resp,
 			      struct cb_process_state *cps)
 {
+	struct cb_sequenceargs *args = argp;
+	struct cb_sequenceres *res = resp;
 	struct nfs4_slot_table *tbl;
 	struct nfs4_slot *slot;
 	struct nfs_client *clp;
@@ -571,9 +576,10 @@ validate_bitmap_values(unsigned long mask)
 	return (mask & ~RCA4_TYPE_MASK_ALL) == 0;
 }
 
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+__be32 nfs4_callback_recallany(void *argp, void *resp,
 			       struct cb_process_state *cps)
 {
+	struct cb_recallanyargs *args = argp;
 	__be32 status;
 	fmode_t flags = 0;
 
@@ -606,9 +612,10 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
 }
 
 /* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+__be32 nfs4_callback_recallslot(void *argp, void *resp,
 				struct cb_process_state *cps)
 {
+	struct cb_recallslotargs *args = argp;
 	struct nfs4_slot_table *fc_tbl;
 	__be32 status;
 
@@ -631,9 +638,11 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
 	return status;
 }
 
-__be32 nfs4_callback_notify_lock(struct cb_notify_lock_args *args, void *dummy,
+__be32 nfs4_callback_notify_lock(void *argp, void *resp,
 				 struct cb_process_state *cps)
 {
+	struct cb_notify_lock_args *args = argp;
+
 	if (!cps->clp) /* set in cb_sequence */
 		return htonl(NFS4ERR_OP_NOT_IN_SESSION);
 

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 390ac9c..681dd64 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c

@@ -43,32 +43,27 @@
 /* Internal error code */
 #define NFS4ERR_RESOURCE_HDR	11050
 
-typedef __be32 (*callback_process_op_t)(void *, void *,
-					struct cb_process_state *);
-typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
-typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
-
-
 struct callback_op {
-	callback_process_op_t process_op;
-	callback_decode_arg_t decode_args;
-	callback_encode_res_t encode_res;
+	__be32 (*process_op)(void *, void *, struct cb_process_state *);
+	__be32 (*decode_args)(struct svc_rqst *, struct xdr_stream *, void *);
+	__be32 (*encode_res)(struct svc_rqst *, struct xdr_stream *,
+			const void *);
 	long res_maxsize;
 };
 
 static struct callback_op callback_ops[];
 
-static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
 {
 	return htonl(NFS4_OK);
 }
 
-static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -184,8 +179,10 @@ static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
 	return 0;
 }
 
-static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args)
+static __be32 decode_getattr_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_getattrargs *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->fh);
@@ -194,8 +191,10 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 	return decode_bitmap(xdr, args->bitmap);
 }
 
-static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
+static __be32 decode_recall_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_recallargs *args = argp;
 	__be32 *p;
 	__be32 status;
 
@@ -217,9 +216,9 @@ static __be32 decode_layout_stateid(struct xdr_stream *xdr, nfs4_stateid *statei
 }
 
 static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
-				       struct xdr_stream *xdr,
-				       struct cb_layoutrecallargs *args)
+				       struct xdr_stream *xdr, void *argp)
 {
+	struct cb_layoutrecallargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	uint32_t iomode;
@@ -262,8 +261,9 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
 static
 __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct cb_devicenotifyargs *args)
+				void *argp)
 {
+	struct cb_devicenotifyargs *args = argp;
 	__be32 *p;
 	__be32 status = 0;
 	u32 tmp;
@@ -403,8 +403,9 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
 
 static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_sequenceargs *args)
+					void *argp)
 {
+	struct cb_sequenceargs *args = argp;
 	__be32 *p;
 	int i;
 	__be32 status;
@@ -450,8 +451,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct cb_recallanyargs *args)
+				      void *argp)
 {
+	struct cb_recallanyargs *args = argp;
 	uint32_t bitmap[2];
 	__be32 *p, status;
 
@@ -469,8 +471,9 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 
 static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct cb_recallslotargs *args)
+					void *argp)
 {
+	struct cb_recallslotargs *args = argp;
 	__be32 *p;
 
 	p = read_buf(xdr, 4);
@@ -510,8 +513,10 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
 	return 0;
 }
 
-static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_notify_lock_args *args)
+static __be32 decode_notify_lock_args(struct svc_rqst *rqstp,
+		struct xdr_stream *xdr, void *argp)
 {
+	struct cb_notify_lock_args *args = argp;
 	__be32 status;
 
 	status = decode_fh(xdr, &args->cbnl_fh);
@@ -641,8 +646,10 @@ static __be32 encode_op_hdr(struct xdr_stream *xdr, uint32_t op, __be32 res)
 	return 0;
 }
 
-static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
+static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *resp)
 {
+	const struct cb_getattrres *res = resp;
 	__be32 *savep = NULL;
 	__be32 status = res->status;
 	
@@ -683,8 +690,9 @@ static __be32 encode_sessionid(struct xdr_stream *xdr,
 
 static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       const struct cb_sequenceres *res)
+				       const void *resp)
 {
+	const struct cb_sequenceres *res = resp;
 	__be32 *p;
 	__be32 status = res->csr_status;
 
@@ -871,7 +879,7 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
 /*
  * Decode, process and encode a COMPOUND
  */
-static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 {
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
@@ -907,7 +915,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	while (status == 0 && nops != hdr_arg.nops) {
 		status = process_op(nops, rqstp, &xdr_in,
-				    argp, &xdr_out, resp, &cps);
+				    rqstp->rq_argp, &xdr_out, rqstp->rq_resp,
+				    &cps);
 		nops++;
 	}
 
@@ -937,48 +946,46 @@ static struct callback_op callback_ops[] = {
 		.res_maxsize = CB_OP_HDR_RES_MAXSZ,
 	},
 	[OP_CB_GETATTR] = {
-		.process_op = (callback_process_op_t)nfs4_callback_getattr,
-		.decode_args = (callback_decode_arg_t)decode_getattr_args,
-		.encode_res = (callback_encode_res_t)encode_getattr_res,
+		.process_op = nfs4_callback_getattr,
+		.decode_args = decode_getattr_args,
+		.encode_res = encode_getattr_res,
 		.res_maxsize = CB_OP_GETATTR_RES_MAXSZ,
 	},
 	[OP_CB_RECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recall,
-		.decode_args = (callback_decode_arg_t)decode_recall_args,
+		.process_op = nfs4_callback_recall,
+		.decode_args = decode_recall_args,
 		.res_maxsize = CB_OP_RECALL_RES_MAXSZ,
 	},
 #if defined(CONFIG_NFS_V4_1)
 	[OP_CB_LAYOUTRECALL] = {
-		.process_op = (callback_process_op_t)nfs4_callback_layoutrecall,
-		.decode_args =
-			(callback_decode_arg_t)decode_layoutrecall_args,
+		.process_op = nfs4_callback_layoutrecall,
+		.decode_args = decode_layoutrecall_args,
 		.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_DEVICEID] = {
-		.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
-		.decode_args =
-			(callback_decode_arg_t)decode_devicenotify_args,
+		.process_op = nfs4_callback_devicenotify,
+		.decode_args = decode_devicenotify_args,
 		.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
 	},
 	[OP_CB_SEQUENCE] = {
-		.process_op = (callback_process_op_t)nfs4_callback_sequence,
-		.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
-		.encode_res = (callback_encode_res_t)encode_cb_sequence_res,
+		.process_op = nfs4_callback_sequence,
+		.decode_args = decode_cb_sequence_args,
+		.encode_res = encode_cb_sequence_res,
 		.res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_ANY] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallany,
-		.decode_args = (callback_decode_arg_t)decode_recallany_args,
+		.process_op = nfs4_callback_recallany,
+		.decode_args = decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
 	[OP_CB_RECALL_SLOT] = {
-		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
-		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.process_op = nfs4_callback_recallslot,
+		.decode_args = decode_recallslot_args,
 		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
 	},
 	[OP_CB_NOTIFY_LOCK] = {
-		.process_op = (callback_process_op_t)nfs4_callback_notify_lock,
-		.decode_args = (callback_decode_arg_t)decode_notify_lock_args,
+		.process_op = nfs4_callback_notify_lock,
+		.decode_args = decode_notify_lock_args,
 		.res_maxsize = CB_OP_NOTIFY_LOCK_RES_MAXSZ,
 	},
 #endif /* CONFIG_NFS_V4_1 */
@@ -987,36 +994,40 @@ static struct callback_op callback_ops[] = {
 /*
  * Define NFS4 callback procedures
  */
-static struct svc_procedure nfs4_callback_procedures1[] = {
+static const struct svc_procedure nfs4_callback_procedures1[] = {
 	[CB_NULL] = {
 		.pc_func = nfs4_callback_null,
-		.pc_decode = (kxdrproc_t)nfs4_decode_void,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_decode = nfs4_decode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_xdrressize = 1,
 	},
 	[CB_COMPOUND] = {
 		.pc_func = nfs4_callback_compound,
-		.pc_encode = (kxdrproc_t)nfs4_encode_void,
+		.pc_encode = nfs4_encode_void,
 		.pc_argsize = 256,
 		.pc_ressize = 256,
 		.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
 	}
 };
 
-struct svc_version nfs4_callback_version1 = {
+static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)];
+const struct svc_version nfs4_callback_version1 = {
 	.vs_vers = 1,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
 	.vs_need_cong_ctrl = true,
 };
 
-struct svc_version nfs4_callback_version4 = {
+static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)];
+const struct svc_version nfs4_callback_version4 = {
 	.vs_vers = 4,
 	.vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1),
 	.vs_proc = nfs4_callback_procedures1,
+	.vs_count = nfs4_callback_count4,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ac00bf..5ac484f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -151,7 +151,7 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int);
+typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
@@ -165,8 +165,8 @@ typedef struct {
 	unsigned long	timestamp;
 	unsigned long	gencount;
 	unsigned int	cache_entry_index;
-	unsigned int	plus:1;
-	unsigned int	eof:1;
+	bool plus;
+	bool eof;
 } nfs_readdir_descriptor_t;
 
 /*
@@ -355,7 +355,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = 0;
+			desc->plus = false;
 			goto again;
 		}
 		goto error;
@@ -557,7 +557,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 		count++;
 
-		if (desc->plus != 0)
+		if (desc->plus)
 			nfs_prime_dcache(file_dentry(desc->file), entry);
 
 		status = nfs_readdir_add_to_array(entry, page);
@@ -860,7 +860,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->ctx = ctx;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
+	desc->plus = nfs_use_readdirplus(inode, ctx);
 
 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -885,8 +885,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			nfs_zap_caches(inode);
 			desc->page_index = 0;
-			desc->plus = 0;
-			desc->eof = 0;
+			desc->plus = false;
+			desc->eof = false;
 			continue;
 		}
 		if (res < 0)
@@ -1115,11 +1115,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	/* Force a full look up iff the parent directory has changed */
 	if (!nfs_is_exclusive_create(dir, flags) &&
 	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
-
-		if (nfs_lookup_verify_inode(inode, flags)) {
+		error = nfs_lookup_verify_inode(inode, flags);
+		if (error) {
 			if (flags & LOOKUP_RCU)
 				return -ECHILD;
-			goto out_zap_parent;
+			if (error == -ESTALE)
+				goto out_zap_parent;
+			goto out_error;
 		}
 		nfs_advise_use_readdirplus(dir);
 		goto out_valid;
@@ -1144,8 +1146,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
 	trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
-	if (error)
+	if (error == -ESTALE || error == -ENOENT)
 		goto out_bad;
+	if (error)
+		goto out_error;
 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
 		goto out_bad;
 	if ((error = nfs_refresh_inode(inode, fattr)) != 0)
@@ -1427,8 +1431,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 	err = finish_open(file, dentry, do_open, opened);
 	if (err)
 		goto out;
-	nfs_file_set_open_context(file, ctx);
-
+	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
+		nfs_file_set_open_context(file, ctx);
+	else
+		err = -ESTALE;
 out:
 	return err;
 }
@@ -1512,7 +1518,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		d_drop(dentry);
 		switch (err) {
 		case -ENOENT:
-			d_add(dentry, NULL);
+			d_splice_alias(NULL, dentry);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
 		case -EISDIR:
@@ -2035,7 +2041,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	error = rpc_wait_for_completion_task(task);
-	if (error == 0)
+	if (error != 0) {
+		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
+		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
+		smp_wmb();
+	} else
 		error = task->tk_status;
 	rpc_put_task(task);
 	nfs_mark_for_revalidate(old_inode);

diff --git a/fs/nfs/export.c b/fs/nfs/export.c
new file mode 100644
index 0000000..249cb96
--- /dev/null
+++ b/fs/nfs/export.c

@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015, Primary Data, Inc. All rights reserved.
+ *
+ * Tao Peng <bergwolf@primarydata.com>
+ */
+#include <linux/dcache.h>
+#include <linux/exportfs.h>
+#include <linux/nfs.h>
+#include <linux/nfs_fs.h>
+
+#include "internal.h"
+#include "nfstrace.h"
+
+#define NFSDBG_FACILITY		NFSDBG_VFS
+
+enum {
+	FILEID_HIGH_OFF = 0,	/* inode fileid high */
+	FILEID_LOW_OFF,		/* inode fileid low */
+	FILE_I_TYPE_OFF,	/* inode type */
+	EMBED_FH_OFF		/* embeded server fh */
+};
+
+
+static struct nfs_fh *nfs_exp_embedfh(__u32 *p)
+{
+	return (struct nfs_fh *)(p + EMBED_FH_OFF);
+}
+
+/*
+ * Let's break subtree checking for now... otherwise we'll have to embed parent fh
+ * but there might not be enough space.
+ */
+static int
+nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent)
+{
+	struct nfs_fh *server_fh = NFS_FH(inode);
+	struct nfs_fh *clnt_fh = nfs_exp_embedfh(p);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+
+	dprintk("%s: max fh len %d inode %p parent %p",
+		__func__, *max_len, inode, parent);
+
+	if (*max_len < len || IS_AUTOMOUNT(inode)) {
+		dprintk("%s: fh len %d too small, required %d\n",
+			__func__, *max_len, len);
+		*max_len = len;
+		return FILEID_INVALID;
+	}
+	if (IS_AUTOMOUNT(inode)) {
+		*max_len = FILEID_INVALID;
+		goto out;
+	}
+
+	p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32;
+	p[FILEID_LOW_OFF] = NFS_FILEID(inode);
+	p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT;
+	p[len - 1] = 0; /* Padding */
+	nfs_copy_fh(clnt_fh, server_fh);
+	*max_len = len;
+out:
+	dprintk("%s: result fh fileid %llu mode %u size %d\n",
+		__func__, NFS_FILEID(inode), inode->i_mode, *max_len);
+	return *max_len;
+}
+
+static struct dentry *
+nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		 int fh_len, int fh_type)
+{
+	struct nfs4_label *label = NULL;
+	struct nfs_fattr *fattr = NULL;
+	struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
+	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+	const struct nfs_rpc_ops *rpc_ops;
+	struct dentry *dentry;
+	struct inode *inode;
+	int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+	u32 *p = fid->raw;
+	int ret;
+
+	/* NULL translates to ESTALE */
+	if (fh_len < len || fh_type != len)
+		return NULL;
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF];
+	fattr->mode = p[FILE_I_TYPE_OFF];
+	fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE;
+
+	dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode);
+
+	inode = nfs_ilookup(sb, fattr, server_fh);
+	if (inode)
+		goto out_found;
+
+	label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
+	if (IS_ERR(label)) {
+		dentry = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
+	ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label);
+	if (ret) {
+		dprintk("%s: getattr failed %d\n", __func__, ret);
+		dentry = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	inode = nfs_fhget(sb, server_fh, fattr, label);
+
+out_found:
+	dentry = d_obtain_alias(inode);
+
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return dentry;
+}
+
+static struct dentry *
+nfs_get_parent(struct dentry *dentry)
+{
+	int ret;
+	struct inode *inode = d_inode(dentry), *pinode;
+	struct super_block *sb = inode->i_sb;
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr *fattr = NULL;
+	struct nfs4_label *label = NULL;
+	struct dentry *parent;
+	struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
+	struct nfs_fh fh;
+
+	if (!ops->lookupp)
+		return ERR_PTR(-EACCES);
+
+	fattr = nfs_alloc_fattr();
+	if (fattr == NULL) {
+		parent = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	label = nfs4_label_alloc(server, GFP_KERNEL);
+	if (IS_ERR(label)) {
+		parent = ERR_CAST(label);
+		goto out_free_fattr;
+	}
+
+	ret = ops->lookupp(inode, &fh, fattr, label);
+	if (ret) {
+		parent = ERR_PTR(ret);
+		goto out_free_label;
+	}
+
+	pinode = nfs_fhget(sb, &fh, fattr, label);
+	parent = d_obtain_alias(pinode);
+out_free_label:
+	nfs4_label_free(label);
+out_free_fattr:
+	nfs_free_fattr(fattr);
+out:
+	return parent;
+}
+
+const struct export_operations nfs_export_ops = {
+	.encode_fh = nfs_encode_fh,
+	.fh_to_dentry = nfs_fh_to_dentry,
+	.get_parent = nfs_get_parent,
+};

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d6..080fc6b 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c

@@ -126,32 +126,13 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 {
 	struct pnfs_layout_hdr *lo = lseg->pls_layout;
 	struct inode *inode = lo->plh_inode;
-	struct nfs_server *mds_server = NFS_SERVER(inode);
 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-	struct nfs_client *mds_client = mds_server->nfs_client;
 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
 	if (task->tk_status >= 0)
 		return 0;
 
 	switch (task->tk_status) {
-	/* MDS state errors */
-	case -NFS4ERR_DELEG_REVOKED:
-	case -NFS4ERR_ADMIN_REVOKED:
-	case -NFS4ERR_BAD_STATEID:
-	case -NFS4ERR_OPENMODE:
-		if (state == NULL)
-			break;
-		if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-			goto out_bad_stateid;
-		goto wait_on_recovery;
-	case -NFS4ERR_EXPIRED:
-		if (state != NULL) {
-			if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-				goto out_bad_stateid;
-		}
-		nfs4_schedule_lease_recovery(mds_client);
-		goto wait_on_recovery;
 	/* DS session errors */
 	case -NFS4ERR_BADSESSION:
 	case -NFS4ERR_BADSLOT:
@@ -172,6 +153,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		break;
 	/* Invalidate Layout errors */
+	case -NFS4ERR_ACCESS:
 	case -NFS4ERR_PNFS_NO_LAYOUT:
 	case -ESTALE:           /* mapped NFS4ERR_STALE */
 	case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
@@ -202,26 +184,17 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 			task->tk_status);
 		nfs4_mark_deviceid_unavailable(devid);
 		pnfs_error_mark_layout_for_return(inode, lseg);
+		pnfs_set_lo_fail(lseg);
 		rpc_wake_up(&tbl->slot_tbl_waitq);
 		/* fall through */
 	default:
-		pnfs_set_lo_fail(lseg);
 reset:
 		dprintk("%s Retry through MDS. Error %d\n", __func__,
 			task->tk_status);
 		return -NFS4ERR_RESET_TO_MDS;
 	}
-out:
 	task->tk_status = 0;
 	return -EAGAIN;
-out_bad_stateid:
-	task->tk_status = -EIO;
-	return 0;
-wait_on_recovery:
-	rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
-	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
-		rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
-	goto out;
 }
 
 /* NFS_PROTO call done callback routines */

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc..1f2ac3d 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c

@@ -1050,34 +1050,10 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 {
 	struct pnfs_layout_hdr *lo = lseg->pls_layout;
 	struct inode *inode = lo->plh_inode;
-	struct nfs_server *mds_server = NFS_SERVER(inode);
-
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
-	struct nfs_client *mds_client = mds_server->nfs_client;
 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
 	switch (task->tk_status) {
-	/* MDS state errors */
-	case -NFS4ERR_DELEG_REVOKED:
-	case -NFS4ERR_ADMIN_REVOKED:
-	case -NFS4ERR_BAD_STATEID:
-		if (state == NULL)
-			break;
-		nfs_remove_bad_delegation(state->inode, NULL);
-	case -NFS4ERR_OPENMODE:
-		if (state == NULL)
-			break;
-		if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-			goto out_bad_stateid;
-		goto wait_on_recovery;
-	case -NFS4ERR_EXPIRED:
-		if (state != NULL) {
-			if (nfs4_schedule_stateid_recovery(mds_server, state) < 0)
-				goto out_bad_stateid;
-		}
-		nfs4_schedule_lease_recovery(mds_client);
-		goto wait_on_recovery;
-	/* DS session errors */
 	case -NFS4ERR_BADSESSION:
 	case -NFS4ERR_BADSLOT:
 	case -NFS4ERR_BAD_HIGH_SLOT:
@@ -1137,17 +1113,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 			task->tk_status);
 		return -NFS4ERR_RESET_TO_MDS;
 	}
-out:
 	task->tk_status = 0;
 	return -EAGAIN;
-out_bad_stateid:
-	task->tk_status = -EIO;
-	return 0;
-wait_on_recovery:
-	rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL);
-	if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0)
-		rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task);
-	goto out;
 }
 
 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1de93ba..109279d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -386,6 +386,28 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
 #endif
 EXPORT_SYMBOL_GPL(nfs_setsecurity);
 
+/* Search for inode identified by fh, fileid and i_mode in inode cache. */
+struct inode *
+nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+	struct nfs_find_desc desc = {
+		.fh	= fh,
+		.fattr	= fattr,
+	};
+	struct inode *inode;
+	unsigned long hash;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_FILEID) ||
+	    !(fattr->valid & NFS_ATTR_FATTR_TYPE))
+		return NULL;
+
+	hash = nfs_fattr_to_ino_t(fattr);
+	inode = ilookup5(sb, hash, nfs_find_actor, &desc);
+
+	dprintk("%s: returning %p\n", __func__, inode);
+	return inode;
+}
+
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
@@ -525,8 +547,14 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		nfs_fscache_init_inode(inode);
 
 		unlock_new_inode(inode);
-	} else
-		nfs_refresh_inode(inode, fattr);
+	} else {
+		int err = nfs_refresh_inode(inode, fattr);
+		if (err < 0) {
+			iput(inode);
+			inode = ERR_PTR(err);
+			goto out_no_inode;
+		}
+	}
 	dprintk("NFS: nfs_fhget(%s/%Lu fh_crc=0x%08x ct=%d)\n",
 		inode->i_sb->s_id,
 		(unsigned long long)NFS_FILEID(inode),
@@ -1315,9 +1343,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 		return 0;
 	/* Has the inode gone and changed behind our back? */
 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
-		return -EIO;
+		return -ESTALE;
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
-		return -EIO;
+		return -ESTALE;
 
 	if (!nfs_file_has_buffered_writers(nfsi)) {
 		/* Verify a few of the more important attributes */

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8701d76..dc45641 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -11,6 +11,8 @@
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
+extern const struct export_operations nfs_export_ops;
+
 struct nfs_string;
 
 /* Maximum number of readahead requests
@@ -226,8 +228,8 @@ static inline void nfs_fs_proc_exit(void)
 #endif
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 struct nfs_pageio_descriptor;
 /* pagelist.c */
@@ -271,19 +273,19 @@ static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 }
 
 /* nfs2xdr.c */
-extern struct rpc_procinfo nfs_procedures[];
+extern const struct rpc_procinfo nfs_procedures[];
 extern int nfs2_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs3xdr.c */
-extern struct rpc_procinfo nfs3_procedures[];
+extern const struct rpc_procinfo nfs3_procedures[];
 extern int nfs3_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 
 /* nfs4xdr.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
 extern int nfs4_decode_dirent(struct xdr_stream *,
-				struct nfs_entry *, int);
+				struct nfs_entry *, bool);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
@@ -293,7 +295,7 @@ extern const u32 nfs41_maxgetdevinfo_overhead;
 
 /* nfs4proc.c */
 #if IS_ENABLED(CONFIG_NFS_V4)
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 #endif
 
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 09b1900..3efe946 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c

@@ -304,7 +304,7 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 }
 
 static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const char *dirpath)
+				const void *dirpath)
 {
 	encode_mntdirpath(xdr, dirpath);
 }
@@ -357,8 +357,9 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct mountres *res)
+				void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_status(xdr, res);
@@ -449,8 +450,9 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 
 static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct mountres *res)
+				 void *data)
 {
+	struct mountres *res = data;
 	int status;
 
 	status = decode_fhs_status(xdr, res);
@@ -464,11 +466,11 @@ static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
 	return decode_auth_flavors(xdr, res);
 }
 
-static struct rpc_procinfo mnt_procedures[] = {
+static const struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
+		.p_encode	= mnt_xdr_enc_dirpath,
+		.p_decode	= mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -476,18 +478,18 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
 	},
 };
 
-static struct rpc_procinfo mnt3_procedures[] = {
+static const struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
+		.p_encode	= mnt_xdr_enc_dirpath,
+		.p_decode	= mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
@@ -495,24 +497,27 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
+		.p_encode	= mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
 	},
 };
 
-
+static unsigned int mnt_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version1 = {
 	.number		= 1,
 	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
+	.counts		= mnt_counts,
 };
 
+static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)];
 static const struct rpc_version mnt_version3 = {
 	.number		= 3,
 	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
+	.counts		= mnt3_counts,
 };
 
 static const struct rpc_version *mnt_version[] = {

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index b4e03ed..fe68dab 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c

@@ -568,8 +568,10 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
 
 static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 const struct nfs_fh *fh)
+				 const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_fhandle(xdr, fh);
 }
 
@@ -583,23 +585,29 @@ static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_sattrargs *args)
+				   const void *data)
 {
+	const struct nfs_sattrargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_diropargs *args)
+				   const void *data)
 {
+	const struct nfs_diropargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 }
 
 static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_readlinkargs *args)
+				      const void *data)
 {
+	const struct nfs_readlinkargs *args = data;
+
 	encode_fhandle(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS_readlinkres_sz);
@@ -632,8 +640,10 @@ static void encode_readargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_pgio_args *args)
+				  const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS_readres_sz);
@@ -672,8 +682,10 @@ static void encode_writeargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -688,16 +700,20 @@ static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_createargs *args)
+				    const void *data)
 {
+	const struct nfs_createargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name, args->len);
 	encode_sattr(xdr, args->sattr);
 }
 
 static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_removeargs *args)
+				    const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -711,8 +727,9 @@ static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_renameargs *args)
+				    const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -730,8 +747,10 @@ static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_linkargs *args)
+				  const void *data)
 {
+	const struct nfs_linkargs *args = data;
+
 	encode_fhandle(xdr, args->fromfh);
 	encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -747,8 +766,10 @@ static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
  */
 static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_symlinkargs *args)
+				     const void *data)
 {
+	const struct nfs_symlinkargs *args = data;
+
 	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_path(xdr, args->pages, args->pathlen);
 	encode_sattr(xdr, args->sattr);
@@ -777,8 +798,10 @@ static void encode_readdirargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_readdirargs *args)
+				     const void *data)
 {
+	const struct nfs_readdirargs *args = data;
+
 	encode_readdirargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 					args->count, NFS_readdirres_sz);
@@ -809,13 +832,13 @@ static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_fattr *result)
+				 void *result)
 {
 	return decode_attrstat(xdr, result, NULL);
 }
 
 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_diropok *result)
+				 void *result)
 {
 	return decode_diropres(xdr, result);
 }
@@ -860,8 +883,9 @@ static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_pgio_res *result)
+				void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -882,8 +906,10 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
+
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
 	return decode_attrstat(xdr, result->fattr, &result->op_status);
@@ -913,7 +939,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	__be32 *p;
 	int error;
@@ -1034,7 +1060,7 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
 }
 
 static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs2_fsstat *result)
+				  void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1118,15 +1144,15 @@ static int nfs_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
-	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
-	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
+	.p_encode   =  nfs2_xdr_enc_##argtype,				\
+	.p_decode   =  nfs2_xdr_dec_##restype,				\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
 	.p_timer    =  timer,						\
 	.p_statidx  =  NFSPROC_##proc,					\
 	.p_name     =  #proc,						\
 	}
-struct rpc_procinfo	nfs_procedures[] = {
+const struct rpc_procinfo nfs_procedures[] = {
 	PROC(GETATTR,	fhandle,	attrstat,	1),
 	PROC(SETATTR,	sattrargs,	attrstat,	0),
 	PROC(LOOKUP,	diropargs,	diropres,	2),
@@ -1144,8 +1170,10 @@ struct rpc_procinfo	nfs_procedures[] = {
 	PROC(STATFS,	fhandle,	statfsres,	0),
 };
 
+static unsigned int nfs_version2_counts[ARRAY_SIZE(nfs_procedures)];
 const struct rpc_version nfs_version2 = {
 	.number			= 2,
 	.nrprocs		= ARRAY_SIZE(nfs_procedures),
-	.procs			= nfs_procedures
+	.procs			= nfs_procedures,
+	.counts			= nfs_version2_counts,
 };

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0c07b56..df4a7d3 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c

@@ -621,7 +621,7 @@ nfs3_proc_rmdir(struct inode *dir, const struct qstr *name)
  */
 static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		  u64 cookie, struct page **pages, unsigned int count, int plus)
+		  u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	__be32			*verf = NFS_I(dir)->cookieverf;

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 267126d..e82c9e5 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c

@@ -846,8 +846,10 @@ static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
  */
 static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs_fh *fh)
+				      const void *data)
 {
+	const struct nfs_fh *fh = data;
+
 	encode_nfs_fh3(xdr, fh);
 }
 
@@ -884,8 +886,9 @@ static void encode_sattrguard3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_sattrargs *args)
+				      const void *data)
 {
+	const struct nfs3_sattrargs *args = data;
 	encode_nfs_fh3(xdr, args->fh);
 	encode_sattr3(xdr, args->sattr);
 	encode_sattrguard3(xdr, args);
@@ -900,8 +903,10 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_diropargs *args)
+				     const void *data)
 {
+	const struct nfs3_diropargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 }
 
@@ -922,8 +927,10 @@ static void encode_access3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_accessargs *args)
+				     const void *data)
 {
+	const struct nfs3_accessargs *args = data;
+
 	encode_access3args(xdr, args);
 }
 
@@ -936,8 +943,10 @@ static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       const struct nfs3_readlinkargs *args)
+				       const void *data)
 {
+	const struct nfs3_readlinkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS3_readlinkres_sz);
@@ -966,8 +975,10 @@ static void encode_read3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_pgio_args *args)
+				   const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS3_readres_sz);
@@ -1008,8 +1019,10 @@ static void encode_write3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_pgio_args *args)
+				    const void *data)
 {
+	const struct nfs_pgio_args *args = data;
+
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
 }
@@ -1055,8 +1068,10 @@ static void encode_createhow3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_createargs *args)
+				     const void *data)
 {
+	const struct nfs3_createargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_createhow3(xdr, args);
 }
@@ -1071,8 +1086,10 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mkdirargs *args)
+				    const void *data)
 {
+	const struct nfs3_mkdirargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_sattr3(xdr, args->sattr);
 }
@@ -1091,16 +1108,20 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_symlinkdata3(struct xdr_stream *xdr,
-				const struct nfs3_symlinkargs *args)
+				const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_sattr3(xdr, args->sattr);
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
 static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_symlinkargs *args)
+				      const void *data)
 {
+	const struct nfs3_symlinkargs *args = data;
+
 	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
 	encode_symlinkdata3(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
@@ -1160,8 +1181,10 @@ static void encode_mknoddata3(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs3_mknodargs *args)
+				    const void *data)
 {
+	const struct nfs3_mknodargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name, args->len);
 	encode_mknoddata3(xdr, args);
 }
@@ -1175,8 +1198,10 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_removeargs *args)
+				     const void *data)
 {
+	const struct nfs_removeargs *args = data;
+
 	encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
 }
 
@@ -1190,8 +1215,9 @@ static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_renameargs *args)
+				     const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
 
@@ -1209,8 +1235,10 @@ static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
  */
 static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs3_linkargs *args)
+				   const void *data)
 {
+	const struct nfs3_linkargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fromfh);
 	encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
 }
@@ -1240,8 +1268,10 @@ static void encode_readdir3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      const struct nfs3_readdirargs *args)
+				      const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdir3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1280,8 +1310,10 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-					  const struct nfs3_readdirargs *args)
+					  const void *data)
 {
+	const struct nfs3_readdirargs *args = data;
+
 	encode_readdirplus3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
@@ -1310,8 +1342,10 @@ static void encode_commit3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs_commitargs *args)
+				     const void *data)
 {
+	const struct nfs_commitargs *args = data;
+
 	encode_commit3args(xdr, args);
 }
 
@@ -1319,8 +1353,10 @@ static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_getaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_getaclargs *args = data;
+
 	encode_nfs_fh3(xdr, args->fh);
 	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL))
@@ -1331,8 +1367,9 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 
 static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs3_setaclargs *args)
+				     const void *data)
 {
+	const struct nfs3_setaclargs *args = data;
 	unsigned int base;
 	int error;
 
@@ -1382,7 +1419,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1419,7 +1456,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs_fattr *result)
+				    void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1460,8 +1497,9 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1507,8 +1545,9 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_accessres *result)
+				   void *data)
 {
+	struct nfs3_accessres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1548,7 +1587,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_fattr *result)
+				     void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1626,8 +1665,9 @@ static int decode_read3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_pgio_res *result)
+				 void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1699,8 +1739,9 @@ static int decode_write3resok(struct xdr_stream *xdr,
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_pgio_res *result)
+				  void *data)
 {
+	struct nfs_pgio_res *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1764,8 +1805,9 @@ static int decode_create3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_diropres *result)
+				   void *data)
 {
+	struct nfs3_diropres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1804,8 +1846,9 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_removeres *result)
+				   void *data)
 {
+	struct nfs_removeres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1845,8 +1888,9 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_renameres *result)
+				   void *data)
 {
+	struct nfs_renameres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1888,8 +1932,9 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
  *	};
  */
 static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs3_linkres *result)
+				 void *data)
 {
+	struct nfs3_linkres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -1946,7 +1991,7 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
  *	};
  */
 int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	struct nfs_entry old = *entry;
 	__be32 *p;
@@ -2072,8 +2117,9 @@ static int decode_readdir3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs3_readdirres *result)
+				    void *data)
 {
+	struct nfs3_readdirres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2140,8 +2186,9 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsstat *result)
+				   void *data)
 {
+	struct nfs_fsstat *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2216,8 +2263,9 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fsinfo *result)
+				   void *data)
 {
+	struct nfs_fsinfo *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2279,8 +2327,9 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_pathconf *result)
+				     void *data)
 {
+	struct nfs_pathconf *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2320,8 +2369,9 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
  */
 static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_commitres *result)
+				   void *data)
 {
+	struct nfs_commitres *result = data;
 	enum nfs_stat status;
 	int error;
 
@@ -2389,7 +2439,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr,
 
 static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs3_getaclres *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2408,7 +2458,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
 
 static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs_fattr *result)
+				   void *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -2495,8 +2545,8 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
-	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
-	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
+	.p_encode    = nfs3_xdr_enc_##argtype##3args,			\
+	.p_decode    = nfs3_xdr_dec_##restype##3res,			\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
 	.p_timer     = timer,						\
@@ -2504,7 +2554,7 @@ static int nfs3_stat_to_errno(enum nfs_stat status)
 	.p_name      = #proc,						\
 	}
 
-struct rpc_procinfo	nfs3_procedures[] = {
+const struct rpc_procinfo nfs3_procedures[] = {
 	PROC(GETATTR,		getattr,	getattr,	1),
 	PROC(SETATTR,		setattr,	setattr,	0),
 	PROC(LOOKUP,		lookup,		lookup,		2),
@@ -2528,18 +2578,20 @@ struct rpc_procinfo	nfs3_procedures[] = {
 	PROC(COMMIT,		commit,		commit,		5),
 };
 
+static unsigned int nfs_version3_counts[ARRAY_SIZE(nfs3_procedures)];
 const struct rpc_version nfs_version3 = {
 	.number			= 3,
 	.nrprocs		= ARRAY_SIZE(nfs3_procedures),
-	.procs			= nfs3_procedures
+	.procs			= nfs3_procedures,
+	.counts			= nfs_version3_counts,
 };
 
 #ifdef CONFIG_NFS_V3_ACL
-static struct rpc_procinfo	nfs3_acl_procedures[] = {
+static const struct rpc_procinfo nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
+		.p_encode = nfs3_xdr_enc_getacl3args,
+		.p_decode = nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
 		.p_timer = 1,
@@ -2547,8 +2599,8 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
-		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
-		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
+		.p_encode = nfs3_xdr_enc_setacl3args,
+		.p_decode = nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
 		.p_timer = 0,
@@ -2556,10 +2608,11 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 };
 
+static unsigned int nfs3_acl_counts[ARRAY_SIZE(nfs3_acl_procedures)];
 const struct rpc_version nfsacl_version3 = {
 	.number			= 3,
-	.nrprocs		= sizeof(nfs3_acl_procedures)/
-				  sizeof(nfs3_acl_procedures[0]),
+	.nrprocs		= ARRAY_SIZE(nfs3_acl_procedures),
 	.procs			= nfs3_acl_procedures,
+	.counts			= nfs3_acl_counts,
 };
 #endif  /* CONFIG_NFS_V3_ACL */

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 319a47d..6c2db51 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c

@@ -146,7 +146,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	loff_t pos_src = args->src_pos;
 	loff_t pos_dst = args->dst_pos;
 	size_t count = args->count;
-	int status;
+	ssize_t status;
 
 	status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
 				     src_lock, FMODE_READ);

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 528362f..5ee1b0f 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c

@@ -112,7 +112,7 @@
 					 decode_getattr_maxsz)
 
 static void encode_fallocate(struct xdr_stream *xdr,
-			     struct nfs42_falloc_args *args)
+			     const struct nfs42_falloc_args *args)
 {
 	encode_nfs4_stateid(xdr, &args->falloc_stateid);
 	encode_uint64(xdr, args->falloc_offset);
@@ -120,7 +120,7 @@ static void encode_fallocate(struct xdr_stream *xdr,
 }
 
 static void encode_allocate(struct xdr_stream *xdr,
-			    struct nfs42_falloc_args *args,
+			    const struct nfs42_falloc_args *args,
 			    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
@@ -128,7 +128,7 @@ static void encode_allocate(struct xdr_stream *xdr,
 }
 
 static void encode_copy(struct xdr_stream *xdr,
-			struct nfs42_copy_args *args,
+			const struct nfs42_copy_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
@@ -145,7 +145,7 @@ static void encode_copy(struct xdr_stream *xdr,
 }
 
 static void encode_deallocate(struct xdr_stream *xdr,
-			      struct nfs42_falloc_args *args,
+			      const struct nfs42_falloc_args *args,
 			      struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
@@ -153,7 +153,7 @@ static void encode_deallocate(struct xdr_stream *xdr,
 }
 
 static void encode_seek(struct xdr_stream *xdr,
-			struct nfs42_seek_args *args,
+			const struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
@@ -163,7 +163,7 @@ static void encode_seek(struct xdr_stream *xdr,
 }
 
 static void encode_layoutstats(struct xdr_stream *xdr,
-			       struct nfs42_layoutstat_args *args,
+			       const struct nfs42_layoutstat_args *args,
 			       struct nfs42_layoutstat_devinfo *devinfo,
 			       struct compound_hdr *hdr)
 {
@@ -191,7 +191,7 @@ static void encode_layoutstats(struct xdr_stream *xdr,
 }
 
 static void encode_clone(struct xdr_stream *xdr,
-			 struct nfs42_clone_args *args,
+			 const struct nfs42_clone_args *args,
 			 struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -210,8 +210,9 @@ static void encode_clone(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  struct nfs42_falloc_args *args)
+				  const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -225,7 +226,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 }
 
 static void encode_copy_commit(struct xdr_stream *xdr,
-			  struct nfs42_copy_args *args,
+			  const struct nfs42_copy_args *args,
 			  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -241,8 +242,9 @@ static void encode_copy_commit(struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_copy_args *args)
+			      const void *data)
 {
+	const struct nfs42_copy_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -262,8 +264,9 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs42_falloc_args *args)
+				    const void *data)
 {
+	const struct nfs42_falloc_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -281,8 +284,9 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
 			      struct xdr_stream *xdr,
-			      struct nfs42_seek_args *args)
+			      const void *data)
 {
+	const struct nfs42_seek_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -299,8 +303,9 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs42_layoutstat_args *args)
+				     const void *data)
 {
+	const struct nfs42_layoutstat_args *args = data;
 	int i;
 
 	struct compound_hdr hdr = {
@@ -321,8 +326,9 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
 			       struct xdr_stream *xdr,
-			       struct nfs42_clone_args *args)
+			       const void *data)
 {
+	const struct nfs42_clone_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -448,8 +454,9 @@ static int decode_clone(struct xdr_stream *xdr)
  */
 static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs42_falloc_res *res)
+				 void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -475,8 +482,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_copy_res *res)
+			     void *data)
 {
+	struct nfs42_copy_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -508,8 +516,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
 				   struct xdr_stream *xdr,
-				   struct nfs42_falloc_res *res)
+				   void *data)
 {
+	struct nfs42_falloc_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -535,8 +544,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
 			     struct xdr_stream *xdr,
-			     struct nfs42_seek_res *res)
+			     void *data)
 {
+	struct nfs42_seek_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -559,8 +569,9 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs42_layoutstat_res *res)
+				    void *data)
 {
+	struct nfs42_layoutstat_res *res = data;
 	struct compound_hdr hdr;
 	int status, i;
 
@@ -589,8 +600,9 @@ static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
 			      struct xdr_stream *xdr,
-			      struct nfs42_clone_res *res)
+			      void *data)
 {
+	struct nfs42_clone_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index af285cc..40bd05f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h

@@ -493,13 +493,13 @@ static inline void nfs4_unregister_sysctl(void)
 #endif
 
 /* nfs4xdr.c */
-extern struct rpc_procinfo nfs4_procedures[];
+extern const struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
 
 /* callback_xdr.c */
-extern struct svc_version nfs4_callback_version1;
-extern struct svc_version nfs4_callback_version4;
+extern const struct svc_version nfs4_callback_version1;
+extern const struct svc_version nfs4_callback_version4;
 
 static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src)
 {

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 66776f0..50566acb 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c

@@ -414,6 +414,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 	if (clp != old)
 		clp->cl_preserve_clid = true;
 	nfs_put_client(clp);
+	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
 	return old;
 
 error:
@@ -852,6 +853,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
 	if (server->options & NFS_OPTION_MIGRATION)
 		set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
+	if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
+		set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
 
 	/* Allocate or find a client reference we can use */
 	clp = nfs_get_client(&cl_init);
@@ -1212,9 +1215,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 		return -EAFNOSUPPORT;
 
 	nfs_server_remove_lists(server);
+	set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion, net);
+	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	nfs_put_client(clp);
 	if (error != 0) {
 		nfs_server_insert_lists(server);

diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 835c163..dd5d27d 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c

@@ -364,7 +364,8 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
 		ret = -EINVAL;
 	} else {
 		ret = kstrtol(id_str, 10, &id_long);
-		*id = (__u32)id_long;
+		if (!ret)
+			*id = (__u32)id_long;
 	}
 	return ret;
 }

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 98b0b66..a0b4e10 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -275,6 +275,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
 static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
+	unsigned int attrs = FATTR4_WORD0_FILEID | FATTR4_WORD0_TYPE;
 	__be32 *start, *p;
 
 	if (cookie > 2) {
@@ -305,8 +306,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 		memcpy(p, ".\0\0\0", 4);                        /* entry */
 		p++;
 		*p++ = xdr_one;                         /* bitmap length */
-		*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
-		*p++ = htonl(8);              /* attribute buffer length */
+		*p++ = htonl(attrs);                           /* bitmap */
+		*p++ = htonl(12);             /* attribute buffer length */
+		*p++ = htonl(NF4DIR);
 		p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
 	}
 	
@@ -317,8 +319,9 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	memcpy(p, "..\0\0", 4);                         /* entry */
 	p++;
 	*p++ = xdr_one;                         /* bitmap length */
-	*p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
-	*p++ = htonl(8);              /* attribute buffer length */
+	*p++ = htonl(attrs);                           /* bitmap */
+	*p++ = htonl(12);             /* attribute buffer length */
+	*p++ = htonl(NF4DIR);
 	p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
 
 	readdir->pgbase = (char *)p - (char *)start;
@@ -1034,11 +1037,11 @@ struct nfs4_opendata {
 	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
-	unsigned int rpc_done : 1;
-	unsigned int file_created : 1;
-	unsigned int is_recover : 1;
+	bool rpc_done;
+	bool file_created;
+	bool is_recover;
+	bool cancelled;
 	int rpc_status;
-	int cancelled;
 };
 
 static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
@@ -1962,7 +1965,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
 		nfs_confirm_seqid(&data->owner->so_seqid, 0);
 		renew_lease(data->o_res.server, data->timestamp);
-		data->rpc_done = 1;
+		data->rpc_done = true;
 	}
 }
 
@@ -1972,7 +1975,7 @@ static void nfs4_open_confirm_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (!data->rpc_done)
@@ -2015,7 +2018,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 
 	nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
 	data->timestamp = jiffies;
 	if (data->is_recover)
@@ -2025,7 +2028,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2124,7 +2127,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
 			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
-	data->rpc_done = 1;
+	data->rpc_done = true;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -2133,7 +2136,7 @@ static void nfs4_open_release(void *calldata)
 	struct nfs4_state *state = NULL;
 
 	/* If this request hasn't been cancelled, do nothing */
-	if (data->cancelled == 0)
+	if (!data->cancelled)
 		goto out_free;
 	/* In case of error, no cleanup! */
 	if (data->rpc_status != 0 || !data->rpc_done)
@@ -2179,20 +2182,20 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 
 	nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
 	kref_get(&data->kref);
-	data->rpc_done = 0;
+	data->rpc_done = false;
 	data->rpc_status = 0;
-	data->cancelled = 0;
-	data->is_recover = 0;
+	data->cancelled = false;
+	data->is_recover = false;
 	if (isrecover) {
 		nfs4_set_sequence_privileged(&o_arg->seq_args);
-		data->is_recover = 1;
+		data->is_recover = true;
 	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
-		data->cancelled = 1;
+		data->cancelled = true;
 		smp_wmb();
 	} else
 		status = data->rpc_status;
@@ -2287,9 +2290,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 
 	if (o_arg->open_flags & O_CREAT) {
 		if (o_arg->open_flags & O_EXCL)
-			data->file_created = 1;
+			data->file_created = true;
 		else if (o_res->cinfo.before != o_res->cinfo.after)
-			data->file_created = 1;
+			data->file_created = true;
 		if (data->file_created || dir->i_version != o_res->cinfo.after)
 			update_changeattr(dir, &o_res->cinfo,
 					o_res->f_attr->time_start);
@@ -3803,6 +3806,54 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
 	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
 }
 
+static int _nfs4_proc_lookupp(struct inode *inode,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+		struct nfs4_label *label)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_server *server = NFS_SERVER(inode);
+	int		       status;
+	struct nfs4_lookupp_arg args = {
+		.bitmask = server->attr_bitmask,
+		.fh = NFS_FH(inode),
+	};
+	struct nfs4_lookupp_res res = {
+		.server = server,
+		.fattr = fattr,
+		.label = label,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	args.bitmask = nfs4_bitmask(server, label);
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupp ino=0x%lx\n", inode->i_ino);
+	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+				&res.seq_res, 0);
+	dprintk("NFS reply lookupp: %d\n", status);
+	return status;
+}
+
+static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+			     struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
+		trace_nfs4_lookupp(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -4273,7 +4324,7 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs4_readdir_arg args = {
@@ -4311,7 +4362,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 }
 
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, int plus)
+		u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct nfs4_exception exception = { };
 	int err;
@@ -6135,7 +6186,7 @@ static void nfs4_lock_release(void *calldata)
 
 	dprintk("%s: begin!\n", __func__);
 	nfs_free_seqid(data->arg.open_seqid);
-	if (data->cancelled != 0) {
+	if (data->cancelled) {
 		struct rpc_task *task;
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
 				data->arg.lock_seqid);
@@ -6218,7 +6269,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 			nfs4_handle_setlk_error(data->server, data->lsp,
 					data->arg.new_lock_owner, ret);
 	} else
-		data->cancelled = 1;
+		data->cancelled = true;
 	rpc_put_task(task);
 	dprintk("%s: done, ret = %d!\n", __func__, ret);
 	trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
@@ -7376,12 +7427,11 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
 	if (status == 0) {
 		clp->cl_clientid = cdata->res.clientid;
 		clp->cl_exchange_flags = cdata->res.flags;
+		clp->cl_seqid = cdata->res.seqid;
 		/* Client ID is not confirmed */
-		if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+		if (!(cdata->res.flags & EXCHGID4_FLAG_CONFIRMED_R))
 			clear_bit(NFS4_SESSION_ESTABLISHED,
-			&clp->cl_session->session_state);
-			clp->cl_seqid = cdata->res.seqid;
-		}
+				  &clp->cl_session->session_state);
 
 		kfree(clp->cl_serverowner);
 		clp->cl_serverowner = cdata->res.server_owner;
@@ -9313,6 +9363,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
 	.lookup		= nfs4_proc_lookup,
+	.lookupp	= nfs4_proc_lookupp,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
 	.create		= nfs4_proc_create,

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cbf82b0..0378e225 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c

@@ -352,11 +352,17 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
 	if (clp != *result)
 		return 0;
 
-	/* Purge state if the client id was established in a prior instance */
-	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
-		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
-	else
-		set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	/*
+	 * Purge state if the client id was established in a prior
+	 * instance and the client id could not have arrived on the
+	 * server via Transparent State Migration.
+	 */
+	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
+		if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
+			set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+		else
+			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+	}
 	nfs4_schedule_state_manager(clp);
 	status = nfs_wait_client_init_complete(clp);
 	if (status < 0)

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 845d0ea..be1da19 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h

@@ -891,6 +891,35 @@ DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
 DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
 
+TRACE_EVENT(nfs4_lookupp,
+		TP_PROTO(
+			const struct inode *inode,
+			int error
+		),
+
+		TP_ARGS(inode, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, ino)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->ino = NFS_FILEID(inode);
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) inode=%02x:%02x:%llu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->ino
+		)
+);
+
 TRACE_EVENT(nfs4_rename,
 		TP_PROTO(
 			const struct inode *olddir,

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3aebfdc..fa3eb36 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -159,6 +159,8 @@ static int nfs4_stat_to_errno(int);
 				(op_decode_hdr_maxsz)
 #define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
 #define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_lookupp_maxsz	(op_encode_hdr_maxsz)
+#define decode_lookupp_maxsz	(op_decode_hdr_maxsz)
 #define encode_share_access_maxsz \
 				(2)
 #define encode_createmode_maxsz	(1 + encode_attrs_maxsz + encode_verifier_maxsz)
@@ -618,6 +620,18 @@ static int nfs4_stat_to_errno(int);
 				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
+#define NFS4_enc_lookupp_sz	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_lookupp_maxsz + \
+				encode_getattr_maxsz + \
+				encode_getfh_maxsz)
+#define NFS4_dec_lookupp_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_lookupp_maxsz + \
+				decode_getattr_maxsz + \
+				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
 				encode_sequence_maxsz + \
 				encode_putrootfh_maxsz + \
@@ -1368,6 +1382,11 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
 	encode_string(xdr, name->len, name->name);
 }
 
+static void encode_lookupp(struct xdr_stream *xdr, struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_LOOKUPP, decode_lookupp_maxsz, hdr);
+}
+
 static void encode_share_access(struct xdr_stream *xdr, u32 share_access)
 {
 	__be32 *p;
@@ -1651,7 +1670,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 }
 
 static void
-encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
+encode_setacl(struct xdr_stream *xdr, const struct nfs_setaclargs *arg,
+		struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1735,7 +1755,7 @@ static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, stru
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
 static void encode_bind_conn_to_session(struct xdr_stream *xdr,
-				   struct nfs41_bind_conn_to_session_args *args,
+				   const struct nfs41_bind_conn_to_session_args *args,
 				   struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1748,7 +1768,7 @@ static void encode_bind_conn_to_session(struct xdr_stream *xdr,
 	*p = (args->use_conn_in_rdma_mode) ? cpu_to_be32(1) : cpu_to_be32(0);
 }
 
-static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+static void encode_op_map(struct xdr_stream *xdr, const struct nfs4_op_map *op_map)
 {
 	unsigned int i;
 	encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS);
@@ -1757,7 +1777,7 @@ static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
 }
 
 static void encode_exchange_id(struct xdr_stream *xdr,
-			       struct nfs41_exchange_id_args *args,
+			       const struct nfs41_exchange_id_args *args,
 			       struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1809,7 +1829,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 }
 
 static void encode_create_session(struct xdr_stream *xdr,
-				  struct nfs41_create_session_args *args,
+				  const struct nfs41_create_session_args *args,
 				  struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1862,7 +1882,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 }
 
 static void encode_destroy_session(struct xdr_stream *xdr,
-				   struct nfs4_session *session,
+				   const struct nfs4_session *session,
 				   struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr);
@@ -1878,7 +1898,7 @@ static void encode_destroy_clientid(struct xdr_stream *xdr,
 }
 
 static void encode_reclaim_complete(struct xdr_stream *xdr,
-				    struct nfs41_reclaim_complete_args *args,
+				    const struct nfs41_reclaim_complete_args *args,
 				    struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
@@ -1974,7 +1994,7 @@ encode_layoutget(struct xdr_stream *xdr,
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
 		    struct inode *inode,
-		    struct nfs4_layoutcommit_args *args,
+		    const struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -2044,7 +2064,7 @@ encode_secinfo_no_name(struct xdr_stream *xdr,
 }
 
 static void encode_test_stateid(struct xdr_stream *xdr,
-				struct nfs41_test_stateid_args *args,
+				const struct nfs41_test_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
@@ -2053,7 +2073,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
 }
 
 static void encode_free_stateid(struct xdr_stream *xdr,
-				struct nfs41_free_stateid_args *args,
+				const struct nfs41_free_stateid_args *args,
 				struct compound_hdr *hdr)
 {
 	encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr);
@@ -2086,8 +2106,9 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
  * Encode an ACCESS request
  */
 static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_accessargs *args)
+				const void *data)
 {
+	const struct nfs4_accessargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2104,8 +2125,9 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LOOKUP request
  */
 static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_lookup_arg *args)
+				const void *data)
 {
+	const struct nfs4_lookup_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2120,12 +2142,33 @@ static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 /*
+ * Encode LOOKUPP request
+ */
+static void nfs4_xdr_enc_lookupp(struct rpc_rqst *req, struct xdr_stream *xdr,
+		const void *data)
+{
+	const struct nfs4_lookupp_arg *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lookupp(xdr, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_nops(&hdr);
+}
+
+/*
  * Encode LOOKUP_ROOT request
  */
 static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_lookup_root_arg *args)
+				     const void *data)
 {
+	const struct nfs4_lookup_root_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2142,8 +2185,9 @@ static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
  * Encode REMOVE request
  */
 static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_removeargs *args)
+				const void *data)
 {
+	const struct nfs_removeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2159,8 +2203,9 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode RENAME request
  */
 static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs_renameargs *args)
+				const void *data)
 {
+	const struct nfs_renameargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2178,8 +2223,9 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode LINK request
  */
 static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct nfs4_link_arg *args)
+			      const void *data)
 {
+	const struct nfs4_link_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2199,8 +2245,9 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode CREATE request
  */
 static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_create_arg *args)
+				const void *data)
 {
+	const struct nfs4_create_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2218,8 +2265,10 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode SYMLINK request
  */
 static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_create_arg *args)
+				 const void *data)
 {
+	const struct nfs4_create_arg *args = data;
+
 	nfs4_xdr_enc_create(req, xdr, args);
 }
 
@@ -2227,8 +2276,9 @@ static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode GETATTR request
  */
 static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_getattr_arg *args)
+				 const void *data)
 {
+	const struct nfs4_getattr_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2244,8 +2294,9 @@ static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a CLOSE request
  */
 static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_closeargs *args)
+			       const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2265,8 +2316,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an OPEN request
  */
 static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_openargs *args)
+			      const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2287,8 +2339,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs_open_confirmargs *args)
+				      const void *data)
 {
+	const struct nfs_open_confirmargs *args = data;
 	struct compound_hdr hdr = {
 		.nops   = 0,
 	};
@@ -2304,8 +2357,9 @@ static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs_openargs *args)
+				     const void *data)
 {
+	const struct nfs_openargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2325,8 +2379,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs_closeargs *args)
+					const void *data)
 {
+	const struct nfs_closeargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2344,8 +2399,9 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
  * Encode a LOCK request
  */
 static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_lock_args *args)
+			      const void *data)
 {
+	const struct nfs_lock_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2361,8 +2417,9 @@ static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKT request
  */
 static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_lockt_args *args)
+			       const void *data)
 {
+	const struct nfs_lockt_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2378,8 +2435,9 @@ static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a LOCKU request
  */
 static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_locku_args *args)
+			       const void *data)
 {
+	const struct nfs_locku_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2393,8 +2451,9 @@ static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
 					   struct xdr_stream *xdr,
-					struct nfs_release_lockowner_args *args)
+					   const void *data)
 {
+	const struct nfs_release_lockowner_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = 0,
 	};
@@ -2408,8 +2467,9 @@ static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
  * Encode a READLINK request
  */
 static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_readlink *args)
+				  const void *data)
 {
+	const struct nfs4_readlink *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2428,8 +2488,9 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READDIR request
  */
 static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 const struct nfs4_readdir_arg *args)
+				 const void *data)
 {
+	const struct nfs4_readdir_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2451,8 +2512,9 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_pgio_args *args)
+			      const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2472,8 +2534,9 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode an SETATTR request
  */
 static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_setattrargs *args)
+				 const void *data)
 {
+	const struct nfs_setattrargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2490,8 +2553,9 @@ static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a GETACL request
  */
 static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_getaclargs *args)
+				const void *data)
 {
+	const struct nfs_getaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2513,8 +2577,9 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_pgio_args *args)
+			       const void *data)
 {
+	const struct nfs_pgio_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2533,8 +2598,9 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
  *  a COMMIT request
  */
 static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_commitargs *args)
+				const void *data)
 {
+	const struct nfs_commitargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2550,8 +2616,9 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
  * FSINFO request
  */
 static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs4_fsinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_fsinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2567,8 +2634,9 @@ static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a PATHCONF request
  */
 static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  const struct nfs4_pathconf_arg *args)
+				  const void *data)
 {
+	const struct nfs4_pathconf_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2585,8 +2653,9 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * a STATFS request
  */
 static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-				const struct nfs4_statfs_arg *args)
+				const void *data)
 {
+	const struct nfs4_statfs_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2604,8 +2673,9 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_server_caps_arg *args)
+				     const void *data)
 {
+	const struct nfs4_server_caps_arg *args = data;
 	const u32 *bitmask = args->bitmask;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2622,8 +2692,10 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
  * a RENEW request
  */
 static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_client *clp)
+			       const void *data)
+
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2638,8 +2710,9 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_setclientid *sc)
+				     const void *data)
 {
+	const struct nfs4_setclientid *sc = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2654,8 +2727,9 @@ static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
 					     struct xdr_stream *xdr,
-					     struct nfs4_setclientid_res *arg)
+					     const void *data)
 {
+	const struct nfs4_setclientid_res *arg = data;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
@@ -2670,8 +2744,9 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     const struct nfs4_delegreturnargs *args)
+				     const void *data)
 {
+	const struct nfs4_delegreturnargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2692,8 +2767,9 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fs_locations_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fs_locations_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2715,8 +2791,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 	}
 
 	/* Set up reply kvec to capture returned fs_locations array. */
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
-			0, PAGE_SIZE);
+	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
+			 (struct page **)&args->page, 0, PAGE_SIZE);
 	encode_nops(&hdr);
 }
 
@@ -2725,8 +2801,9 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_arg *args)
+				const void *data)
 {
+	const struct nfs4_secinfo_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2743,8 +2820,9 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_fsid_present_arg *args)
+				      const void *data)
 {
+	const struct nfs4_fsid_present_arg *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2764,8 +2842,9 @@ static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
 				struct xdr_stream *xdr,
-				struct nfs41_bind_conn_to_session_args *args)
+				const void *data)
 {
+	const struct nfs41_bind_conn_to_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2780,8 +2859,9 @@ static void nfs4_xdr_enc_bind_conn_to_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_exchange_id_args *args)
+				     const void *data)
 {
+	const struct nfs41_exchange_id_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2796,8 +2876,9 @@ static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_create_session_args *args)
+					const void *data)
 {
+	const struct nfs41_create_session_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
@@ -2812,8 +2893,9 @@ static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs4_session *session)
+					 const void *data)
 {
+	const struct nfs4_session *session = data;
 	struct compound_hdr hdr = {
 		.minorversion = session->clp->cl_mvops->minor_version,
 	};
@@ -2828,8 +2910,9 @@ static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
 					 struct xdr_stream *xdr,
-					 struct nfs_client *clp)
+					 const void *data)
 {
+	const struct nfs_client *clp = data;
 	struct compound_hdr hdr = {
 		.minorversion = clp->cl_mvops->minor_version,
 	};
@@ -2843,8 +2926,9 @@ static void nfs4_xdr_enc_destroy_clientid(struct rpc_rqst *req,
  * a SEQUENCE request
  */
 static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs4_sequence_args *args)
+				  const void *data)
 {
+	const struct nfs4_sequence_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(args),
 	};
@@ -2859,8 +2943,9 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs4_get_lease_time_args *args)
+					const void *data)
 {
+	const struct nfs4_get_lease_time_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
 	};
@@ -2878,8 +2963,9 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
 					  struct xdr_stream *xdr,
-				struct nfs41_reclaim_complete_args *args)
+					  const void *data)
 {
+	const struct nfs41_reclaim_complete_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
 	};
@@ -2895,8 +2981,9 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 				       struct xdr_stream *xdr,
-				       struct nfs4_getdeviceinfo_args *args)
+				       const void *data)
 {
+	const struct nfs4_getdeviceinfo_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2919,8 +3006,9 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   struct nfs4_layoutget_args *args)
+				   const void *data)
 {
+	const struct nfs4_layoutget_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2941,8 +3029,9 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutcommit_args *args)
+				      const void *priv)
 {
+	const struct nfs4_layoutcommit_args *args = priv;
 	struct nfs4_layoutcommit_data *data =
 		container_of(args, struct nfs4_layoutcommit_data, args);
 	struct compound_hdr hdr = {
@@ -2962,8 +3051,9 @@ static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs4_layoutreturn_args *args)
+				      const void *data)
 {
+	const struct nfs4_layoutreturn_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2978,10 +3068,11 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
 /*
  * Encode SECINFO_NO_NAME request
  */
-static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+static void nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					struct nfs41_secinfo_no_name_args *args)
+					const void *data)
 {
+	const struct nfs41_secinfo_no_name_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -2991,7 +3082,6 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
 	encode_putrootfh(xdr, &hdr);
 	encode_secinfo_no_name(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
@@ -2999,8 +3089,9 @@ static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
 				      struct xdr_stream *xdr,
-				      struct nfs41_test_stateid_args *args)
+				      const void *data)
 {
+	const struct nfs41_test_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -3016,8 +3107,9 @@ static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
  */
 static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_args *args)
+				     const void *data)
 {
+	const struct nfs41_free_stateid_args *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -5005,6 +5097,11 @@ static int decode_lookup(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_LOOKUP);
 }
 
+static int decode_lookupp(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_LOOKUPP);
+}
+
 /* This is too sick! */
 static int decode_space_limit(struct xdr_stream *xdr,
 		unsigned long *pagemod_limit)
@@ -6101,8 +6198,9 @@ int decode_layoutreturn(struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs_closeres *res)
+				       void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6130,8 +6228,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
  * Decode ACCESS response
  */
 static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_accessres *res)
+			       void *data)
 {
+	struct nfs4_accessres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6156,8 +6255,9 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOOKUP response
  */
 static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_lookup_res *res)
+			       void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6182,12 +6282,43 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 }
 
 /*
+ * Decode LOOKUPP response
+ */
+static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *data)
+{
+	struct nfs4_lookupp_res *res = data;
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_lookupp(xdr);
+	if (status)
+		goto out;
+	status = decode_getfh(xdr, res->fh);
+	if (status)
+		goto out;
+	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+out:
+	return status;
+}
+
+/*
  * Decode LOOKUP_ROOT response
  */
 static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_lookup_res *res)
+				    void *data)
 {
+	struct nfs4_lookup_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6212,8 +6343,9 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
  * Decode REMOVE response
  */
 static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_removeres *res)
+			       void *data)
 {
+	struct nfs_removeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6235,8 +6367,9 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode RENAME response
  */
 static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_renameres *res)
+			       void *data)
 {
+	struct nfs_renameres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6264,8 +6397,9 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LINK response
  */
 static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs4_link_res *res)
+			     void *data)
 {
+	struct nfs4_link_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6303,8 +6437,9 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CREATE response
  */
 static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs4_create_res *res)
+			       void *data)
 {
+	struct nfs4_create_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6332,7 +6467,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode SYMLINK response
  */
 static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_create_res *res)
+				void *res)
 {
 	return nfs4_xdr_dec_create(rqstp, xdr, res);
 }
@@ -6341,8 +6476,9 @@ static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode GETATTR response
  */
 static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_getattr_res *res)
+				void *data)
 {
+	struct nfs4_getattr_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6364,8 +6500,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Encode an SETACL request
  */
 static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_setaclargs *args)
+				const void *data)
 {
+	const struct nfs_setaclargs *args = data;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
@@ -6382,8 +6519,9 @@ static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_setaclres *res)
+		    void *data)
 {
+	struct nfs_setaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6406,8 +6544,9 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int
 nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-		    struct nfs_getaclres *res)
+		    void *data)
 {
+	struct nfs_getaclres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6434,8 +6573,9 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode CLOSE response
  */
 static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_closeres *res)
+			      void *data)
 {
+	struct nfs_closeres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6468,8 +6608,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode OPEN response
  */
 static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_openres *res)
+			     void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6500,8 +6641,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs_open_confirmres *res)
+				     void *data)
 {
+	struct nfs_open_confirmres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6521,8 +6663,9 @@ static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs_openres *res)
+				    void *data)
 {
+	struct nfs_openres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6550,8 +6693,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs_setattrres *res)
+				void *data)
 {
+	struct nfs_setattrres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6576,8 +6720,9 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
  * Decode LOCK response
  */
 static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_lock_res *res)
+			     void *data)
 {
+	struct nfs_lock_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6599,8 +6744,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKT response
  */
 static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_lockt_res *res)
+			      void *data)
 {
+	struct nfs_lockt_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6622,8 +6768,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode LOCKU response
  */
 static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_locku_res *res)
+			      void *data)
 {
+	struct nfs_locku_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6658,8 +6805,9 @@ static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_readlink_res *res)
+				 void *data)
 {
+	struct nfs4_readlink_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6681,8 +6829,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
  * Decode READDIR response
  */
 static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-				struct nfs4_readdir_res *res)
+				void *data)
 {
+	struct nfs4_readdir_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6704,8 +6853,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_pgio_res *res)
+			     void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6730,8 +6880,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_pgio_res *res)
+			      void *data)
 {
+	struct nfs_pgio_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6760,8 +6911,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode COMMIT response
  */
 static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			       struct nfs_commitres *res)
+			       void *data)
 {
+	struct nfs_commitres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6784,8 +6936,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  * Decode FSINFO response
  */
 static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_fsinfo_res *res)
+			       void *data)
 {
+	struct nfs4_fsinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6803,8 +6956,9 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode PATHCONF response
  */
 static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs4_pathconf_res *res)
+				 void *data)
 {
+	struct nfs4_pathconf_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6822,8 +6976,9 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode STATFS response
  */
 static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs4_statfs_res *res)
+			       void *data)
 {
+	struct nfs4_statfs_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6842,8 +6997,9 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_server_caps_res *res)
+				    void *data)
 {
+	struct nfs4_server_caps_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6881,8 +7037,9 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    struct nfs4_setclientid_res *res)
+				    void *data)
 {
+	struct nfs4_setclientid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6896,7 +7053,8 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
  * Decode SETCLIENTID_CONFIRM response
  */
 static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
-					    struct xdr_stream *xdr)
+					    struct xdr_stream *xdr,
+					    void *data)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6912,8 +7070,9 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 				    struct xdr_stream *xdr,
-				    struct nfs4_delegreturnres *res)
+				    void *data)
 {
+	struct nfs4_delegreturnres *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6947,8 +7106,9 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fs_locations_res *res)
+				     void *data)
 {
+	struct nfs4_fs_locations_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -6990,8 +7150,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
  */
 static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct nfs4_secinfo_res *res)
+				void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7014,8 +7175,9 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_fsid_present_res *res)
+				     void *data)
 {
+	struct nfs4_fsid_present_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7075,7 +7237,7 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs41_create_session_res *res)
+				       void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7123,7 +7285,7 @@ static int nfs4_xdr_dec_destroy_clientid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
 				 struct xdr_stream *xdr,
-				 struct nfs4_sequence_res *res)
+				 void *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -7139,8 +7301,9 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
 				       struct xdr_stream *xdr,
-				       struct nfs4_get_lease_time_res *res)
+				       void *data)
 {
+	struct nfs4_get_lease_time_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7159,8 +7322,9 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
 					 struct xdr_stream *xdr,
-					 struct nfs41_reclaim_complete_res *res)
+					 void *data)
 {
+	struct nfs41_reclaim_complete_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7177,8 +7341,9 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
 				      struct xdr_stream *xdr,
-				      struct nfs4_getdeviceinfo_res *res)
+				      void *data)
 {
+	struct nfs4_getdeviceinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7198,8 +7363,9 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfs4_layoutget_res *res)
+				  void *data)
 {
+	struct nfs4_layoutget_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7222,8 +7388,9 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutreturn_res *res)
+				     void *data)
 {
+	struct nfs4_layoutreturn_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7246,8 +7413,9 @@ static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs4_layoutcommit_res *res)
+				     void *data)
 {
+	struct nfs4_layoutcommit_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7273,8 +7441,9 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfs4_secinfo_res *res)
+					void *data)
 {
+	struct nfs4_secinfo_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7297,8 +7466,9 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_test_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_test_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7318,8 +7488,9 @@ static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
  */
 static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
 				     struct xdr_stream *xdr,
-				     struct nfs41_free_stateid_res *res)
+				     void *data)
 {
+	struct nfs41_free_stateid_res *res = data;
 	struct compound_hdr hdr;
 	int status;
 
@@ -7350,7 +7521,7 @@ static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
  * on a directory already in our cache.
  */
 int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
-		       int plus)
+		       bool plus)
 {
 	unsigned int savep;
 	uint32_t bitmap[3] = {0};
@@ -7484,8 +7655,8 @@ nfs4_stat_to_errno(int stat)
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
-	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
-	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
+	.p_encode = nfs4_xdr_##argtype,				\
+	.p_decode = nfs4_xdr_##restype,				\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
 	.p_statidx = NFSPROC4_CLNT_##proc,			\
@@ -7497,7 +7668,7 @@ nfs4_stat_to_errno(int stat)
 	.p_name = #proc,	\
 }
 
-struct rpc_procinfo	nfs4_procedures[] = {
+const struct rpc_procinfo nfs4_procedures[] = {
 	PROC(READ,		enc_read,		dec_read),
 	PROC(WRITE,		enc_write,		dec_write),
 	PROC(COMMIT,		enc_commit,		dec_commit),
@@ -7517,6 +7688,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(ACCESS,		enc_access,		dec_access),
 	PROC(GETATTR,		enc_getattr,		dec_getattr),
 	PROC(LOOKUP,		enc_lookup,		dec_lookup),
+	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
 	PROC(LOOKUP_ROOT,	enc_lookup_root,	dec_lookup_root),
 	PROC(REMOVE,		enc_remove,		dec_remove),
 	PROC(RENAME,		enc_rename,		dec_rename),
@@ -7564,10 +7736,12 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_2 */
 };
 
+static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
 const struct rpc_version nfs_version4 = {
 	.number			= 4,
 	.nrprocs		= ARRAY_SIZE(nfs4_procedures),
-	.procs			= nfs4_procedures
+	.procs			= nfs4_procedures,
+	.counts			= nfs_version4_counts,
 };
 
 /*

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad92b40..de9066a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c

@@ -50,8 +50,8 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->cred = hdr->req->wb_context->cred;
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = mirror->pg_count;
+	hdr->io_completion = desc->pg_io_completion;
 	hdr->dreq = desc->pg_dreq;
-	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
 	if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
 		return 0;
 
-	if (!nonblock)
+	if (!nonblock) {
+		set_bit(PG_CONTENDED1, &head->wb_flags);
+		smp_mb__after_atomic();
 		return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
 				TASK_UNINTERRUPTIBLE);
+	}
 
 	return -EAGAIN;
 }
@@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req)
 
 	WARN_ON_ONCE(head != head->wb_head);
 
+	if (!test_bit(PG_HEADLOCK, &head->wb_flags))
+		return;
+	set_bit(PG_CONTENDED1, &head->wb_flags);
+	smp_mb__after_atomic();
 	wait_on_bit(&head->wb_flags, PG_HEADLOCK,
 		TASK_UNINTERRUPTIBLE);
 }
@@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_HEADLOCK, &head->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+		return;
 	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
 }
 
@@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req)
 	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
 	smp_mb__after_atomic();
+	if (!test_bit(PG_CONTENDED2, &req->wb_flags))
+		return;
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
@@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
+	if (!test_bit(PG_BUSY, &req->wb_flags))
+		return 0;
+	set_bit(PG_CONTENDED2, &req->wb_flags);
+	smp_mb__after_atomic();
 	return wait_on_bit_io(&req->wb_flags, PG_BUSY,
 			      TASK_UNINTERRUPTIBLE);
 }
@@ -710,8 +725,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
+	desc->pg_io_completion = NULL;
 	desc->pg_dreq = NULL;
-	desc->pg_layout_private = NULL;
 	desc->pg_bsize = bsize;
 
 	desc->pg_mirror_count = 1;
@@ -779,6 +794,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	gfp_t gfp_flags = GFP_KERNEL;
 
 	pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
+	pg_array->npages = pagecount;
 
 	if (pagecount <= ARRAY_SIZE(pg_array->page_array))
 		pg_array->pagevec = pg_array->page_array;
@@ -1233,6 +1249,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
 {
 	LIST_HEAD(failed);
 
+	desc->pg_io_completion = hdr->io_completion;
 	desc->pg_dreq = hdr->dreq;
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 9872cf6..7962e49 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c

@@ -485,7 +485,7 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
  */
 static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-		 u64 cookie, struct page **pages, unsigned int count, int plus)
+		 u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
 	struct inode		*dir = d_inode(dentry);
 	struct nfs_readdirargs	arg = {

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c5334c0..d828ef8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c

@@ -879,7 +879,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
 	if (nfss->options & NFS_OPTION_FSCACHE) {
 		seq_printf(m, "\n\tfsc:\t");
 		for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
-			seq_printf(m, "%Lu ", totals.bytes[i]);
+			seq_printf(m, "%Lu ", totals.fscache[i]);
 	}
 #endif
 	seq_printf(m, "\n");
@@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
 		 */
 		sb->s_flags |= MS_POSIXACL;
 		sb->s_time_gran = 1;
+		sb->s_export_op = &nfs_export_ops;
 	}
 
  	nfs_initialise_sb(sb);
@@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb,
 	sb->s_xattr = old_sb->s_xattr;
 	sb->s_op = old_sb->s_op;
 	sb->s_time_gran = 1;
+	sb->s_export_op = old_sb->s_export_op;
 
 	if (server->nfs_client->rpc_ops->version != 2) {
 		/* The VFS shouldn't apply the umask to mode bits. We will do

diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 191aa57..e3949d9 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c

@@ -288,6 +288,19 @@ static void nfs_async_rename_release(void *calldata)
 	if (d_really_is_positive(data->old_dentry))
 		nfs_mark_for_revalidate(d_inode(data->old_dentry));
 
+	/* The result of the rename is unknown. Play it safe by
+	 * forcing a new lookup */
+	if (data->cancelled) {
+		spin_lock(&data->old_dir->i_lock);
+		nfs_force_lookup_revalidate(data->old_dir);
+		spin_unlock(&data->old_dir->i_lock);
+		if (data->new_dir != data->old_dir) {
+			spin_lock(&data->new_dir->i_lock);
+			nfs_force_lookup_revalidate(data->new_dir);
+			spin_unlock(&data->new_dir->i_lock);
+		}
+	}
+
 	dput(data->old_dentry);
 	dput(data->new_dentry);
 	iput(data->old_dir);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index db7ba54..b1af5de 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -40,6 +40,12 @@
 #define MIN_POOL_WRITE		(32)
 #define MIN_POOL_COMMIT		(4)
 
+struct nfs_io_completion {
+	void (*complete)(void *data);
+	void *data;
+	struct kref refcount;
+};
+
 /*
  * Local function declarations
  */
@@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 	mempool_free(hdr, nfs_wdata_mempool);
 }
 
+static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
+{
+	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
+}
+
+static void nfs_io_completion_init(struct nfs_io_completion *ioc,
+		void (*complete)(void *), void *data)
+{
+	ioc->complete = complete;
+	ioc->data = data;
+	kref_init(&ioc->refcount);
+}
+
+static void nfs_io_completion_release(struct kref *kref)
+{
+	struct nfs_io_completion *ioc = container_of(kref,
+			struct nfs_io_completion, refcount);
+	ioc->complete(ioc->data);
+	kfree(ioc);
+}
+
+static void nfs_io_completion_get(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_get(&ioc->refcount);
+}
+
+static void nfs_io_completion_put(struct nfs_io_completion *ioc)
+{
+	if (ioc != NULL)
+		kref_put(&ioc->refcount, nfs_io_completion_release);
+}
+
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
 	ctx->error = error;
@@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 	return ret;
 }
 
+static void nfs_io_completion_commit(void *inode)
+{
+	nfs_commit_inode(inode, 0);
+}
+
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
 	struct nfs_pageio_descriptor pgio;
+	struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
+	if (ioc)
+		nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+
 	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
 				&nfs_async_write_completion_ops);
+	pgio.pg_io_completion = ioc;
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
+	nfs_io_completion_put(ioc);
 
 	if (err < 0)
 		goto out_err;
@@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
+static void nfs_async_write_init(struct nfs_pgio_header *hdr)
+{
+	nfs_io_completion_get(hdr->io_completion);
+}
+
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
@@ -973,6 +1028,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 		nfs_release_request(req);
 	}
 out:
+	nfs_io_completion_put(hdr->io_completion);
 	hdr->release(hdr);
 }
 
@@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
 }
 
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+	.init_hdr = nfs_async_write_init,
 	.error_cleanup = nfs_async_write_error,
 	.completion = nfs_write_completion,
 	.reschedule_io = nfs_async_write_reschedule_io,
@@ -1884,7 +1941,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
+		if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */

diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h
index 4123551..34075ce 100644
--- a/fs/nfsd/current_stateid.h
+++ b/fs/nfsd/current_stateid.h

@@ -8,21 +8,33 @@ extern void clear_current_stateid(struct nfsd4_compound_state *cstate);
 /*
  * functions to set current state id
  */
-extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *);
-extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *);
-extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
+extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_openstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_set_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 /*
  * functions to consume current state id
  */
-extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *);
-extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *);
-extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *);
-extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *);
-extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *);
-extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *);
-extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *);
-extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *);
+extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_freestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_closestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_readstateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
+extern void nfsd4_get_writestateid(struct nfsd4_compound_state *,
+		union nfsd4_op_u *);
 
 #endif   /* _NFSD4_CURRENT_STATE_H */

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 838f90f..6276ec8 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c

@@ -19,7 +19,7 @@
  * NULL call.
  */
 static __be32
-nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsacld_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -27,9 +27,10 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -87,10 +88,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -141,9 +142,10 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 /*
  * Check file attributes
  */
-static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
-		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
+static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -158,9 +160,10 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-		struct nfsd3_accessres *resp)
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: ACCESS(2acl)   %s 0x%x\n",
@@ -179,9 +182,10 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *argp)
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -191,9 +195,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *argp)
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -217,18 +221,20 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_fhandle *argp)
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessargs *argp)
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+
 	p = nfs2svc_decode_fh(p, &argp->fh);
 	if (!p)
 		return 0;
@@ -245,15 +251,15 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
  * There must be an encoding function for void results so svc_process
  * will work properly.
  */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
 	struct kvec *head = rqstp->rq_res.head;
@@ -296,17 +302,19 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_accessres *resp)
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->access);
 	return xdr_ressize_check(rqstp, p);
@@ -315,27 +323,27 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
-static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd_attrstat *resp)
+static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
-               struct nfsd3_accessres *resp)
+static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
 {
-       fh_put(&resp->fh);
-       return 1;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+	fh_put(&resp->fh);
 }
 
 #define nfsaclsvc_decode_voidargs	NULL
@@ -345,24 +353,24 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres		nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsacld_proc_##name,		\
-   (kxdrproc_t) nfsaclsvc_decode_##argt##args,	\
-   (kxdrproc_t) nfsaclsvc_encode_##rest##res,	\
-   (kxdrproc_t) nfsaclsvc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= nfsacld_proc_##name,				\
+	.pc_decode	= nfsaclsvc_decode_##argt##args,		\
+	.pc_encode	= nfsaclsvc_encode_##rest##res,			\
+	.pc_release	= nfsaclsvc_release_##relt,	\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures2[] = {
+static const struct svc_procedure nfsd_acl_procedures2[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		attrstat,	attrstat, RC_NOCACHE, ST+AT),
@@ -370,10 +378,12 @@ static struct svc_procedure		nfsd_acl_procedures2[] = {
   PROC(access,	access,		access,		access,   RC_NOCACHE, ST+AT+1),
 };
 
-struct svc_version	nfsd_acl_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 5,
-		.vs_proc	= nfsd_acl_procedures2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
+const struct svc_version nfsd_acl_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 5,
+	.vs_proc	= nfsd_acl_procedures2,
+	.vs_count	= nfsd_acl_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index dcb5f79..0197652 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c

@@ -18,7 +18,7 @@
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -26,9 +26,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
-		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
+static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct posix_acl *acl;
 	struct inode *inode;
 	svc_fh *fh;
@@ -80,10 +81,10 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
-		struct nfsd3_setaclargs *argp,
-		struct nfsd3_attrstat *resp)
+static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	struct inode *inode;
 	svc_fh *fh;
 	__be32 nfserr = 0;
@@ -123,9 +124,10 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclargs *args)
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
 	p = nfs3svc_decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -135,9 +137,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_setaclargs *args)
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_setaclargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	unsigned int base;
 	int n;
@@ -166,9 +168,9 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -211,9 +213,10 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_attrstat *resp)
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
 
 	return xdr_ressize_check(rqstp, p);
@@ -222,13 +225,13 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
-		struct nfsd3_getaclres *resp)
+static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
 {
+	struct nfsd3_getaclres *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
 	posix_acl_release(resp->acl_access);
 	posix_acl_release(resp->acl_default);
-	return 1;
 }
 
 #define nfs3svc_decode_voidargs		NULL
@@ -237,34 +240,36 @@ static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
+#define PROC(name, argt, rest, relt, cache, respsize)			\
+{									\
+	.pc_func	= nfsd3_proc_##name,				\
+	.pc_decode	= nfs3svc_decode_##argt##args,			\
+	.pc_encode	= nfs3svc_encode_##rest##res,			\
+	.pc_release	= nfs3svc_release_##relt,			\
+	.pc_argsize	= sizeof(struct nfsd3_##argt##args),		\
+	.pc_ressize	= sizeof(struct nfsd3_##rest##res),		\
+	.pc_cachetype	= cache,					\
+	.pc_xdrressize	= respsize,					\
+}
 
 #define ST 1		/* status*/
 #define AT 21		/* attributes */
 #define pAT (1+AT)	/* post attributes - conditional */
 #define ACL (1+NFS_ACL_MAX_ENTRIES*3)  /* Access Control List */
 
-static struct svc_procedure		nfsd_acl_procedures3[] = {
+static const struct svc_procedure nfsd_acl_procedures3[] = {
   PROC(null,	void,		void,		void,	  RC_NOCACHE, ST),
   PROC(getacl,	getacl,		getacl,		getacl,	  RC_NOCACHE, ST+1+2*(1+ACL)),
   PROC(setacl,	setacl,		setacl,		fhandle,  RC_NOCACHE, ST+pAT),
 };
 
-struct svc_version	nfsd_acl_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 3,
-		.vs_proc	= nfsd_acl_procedures3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
+const struct svc_version nfsd_acl_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 3,
+	.vs_proc	= nfsd_acl_procedures3,
+	.vs_count	= nfsd_acl_count3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };
 

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 045c908..2cb56a0 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c

@@ -31,7 +31,7 @@ static int	nfs3_ftypes[] = {
  * NULL call.
  */
 static __be32
-nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd3_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -40,9 +40,10 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
  * Get a file's attributes
  */
 static __be32
-nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					   struct nfsd3_attrstat *resp)
+nfsd3_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -63,9 +64,10 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  */
 static __be32
-nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
-					   struct nfsd3_attrstat  *resp)
+nfsd3_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SETATTR(3)  %s\n",
@@ -81,9 +83,10 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
  * Look up a path name component
  */
 static __be32
-nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_diropres  *resp)
+nfsd3_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP(3)   %s %.*s\n",
@@ -105,9 +108,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Check file access
  */
 static __be32
-nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
-					  struct nfsd3_accessres *resp)
+nfsd3_proc_access(struct svc_rqst *rqstp)
 {
+	struct nfsd3_accessargs *argp = rqstp->rq_argp;
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: ACCESS(3)   %s 0x%x\n",
@@ -124,9 +128,10 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
-					   struct nfsd3_readlinkres *resp)
+nfsd3_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	__be32 nfserr;
 
 	dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
@@ -142,9 +147,10 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
  * Read a portion of a file.
  */
 static __be32
-nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
-				        struct nfsd3_readres  *resp)
+nfsd3_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readargs *argp = rqstp->rq_argp;
+	struct nfsd3_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 	unsigned long cnt = min(argp->count, max_blocksize);
@@ -179,9 +185,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  * Write data to a file
  */
 static __be32
-nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
-					 struct nfsd3_writeres  *resp)
+nfsd3_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd3_writeargs *argp = rqstp->rq_argp;
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -206,9 +213,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * first reports about SunOS compatibility problems start to pour in...
  */
 static __be32
-nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					  struct nfsd3_diropres   *resp)
+nfsd3_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp, *newfhp = NULL;
 	struct iattr	*attr;
 	__be32		nfserr;
@@ -243,9 +251,10 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
  * Make directory. This operation is not idempotent.
  */
 static __be32
-nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
-					 struct nfsd3_diropres   *resp)
+nfsd3_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_createargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR(3)    %s %.*s\n",
@@ -263,9 +272,10 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 }
 
 static __be32
-nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
-					   struct nfsd3_diropres    *resp)
+nfsd3_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
@@ -284,9 +294,10 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
  * Make socket/fifo/device.
  */
 static __be32
-nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
-					 struct nfsd3_diropres  *resp)
+nfsd3_proc_mknod(struct svc_rqst *rqstp)
 {
+	struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+	struct nfsd3_diropres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int type;
 	dev_t	rdev = 0;
@@ -321,9 +332,10 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
  * Remove file/fifo/socket etc.
  */
 static __be32
-nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					  struct nfsd3_attrstat  *resp)
+nfsd3_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE(3)   %s %.*s\n",
@@ -342,9 +354,10 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
-					 struct nfsd3_attrstat  *resp)
+nfsd3_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_diropargs *argp = rqstp->rq_argp;
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR(3)    %s %.*s\n",
@@ -359,9 +372,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 }
 
 static __be32
-nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
-					  struct nfsd3_renameres  *resp)
+nfsd3_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd3_renameargs *argp = rqstp->rq_argp;
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME(3)   %s %.*s ->\n",
@@ -381,9 +395,10 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 }
 
 static __be32
-nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
-					struct nfsd3_linkres  *resp)
+nfsd3_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd3_linkargs *argp = rqstp->rq_argp;
+	struct nfsd3_linkres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK(3)     %s ->\n",
@@ -404,9 +419,10 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					   struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32		nfserr;
 	int		count;
 
@@ -440,9 +456,10 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * For now, we choose to ignore the dircount parameter.
  */
 static __be32
-nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
-					       struct nfsd3_readdirres  *resp)
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
 {
+	struct nfsd3_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd3_readdirres  *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
@@ -507,9 +524,10 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Get file system stats
  */
 static __be32
-nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsstatres *resp)
+nfsd3_proc_fsstat(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: FSSTAT(3)   %s\n",
@@ -524,9 +542,10 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get file system info
  */
 static __be32
-nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
-					   struct nfsd3_fsinfores *resp)
+nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	u32	max_blocksize = svc_max_payload(rqstp);
 
@@ -567,9 +586,10 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
  * Get pathconf info for the specified file
  */
 static __be32
-nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
-					     struct nfsd3_pathconfres *resp)
+nfsd3_proc_pathconf(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: PATHCONF(3) %s\n",
@@ -610,9 +630,10 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
  * Commit a file (range) to stable storage.
  */
 static __be32
-nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
-					   struct nfsd3_commitres  *resp)
+nfsd3_proc_commit(struct svc_rqst *rqstp)
 {
+	struct nfsd3_commitargs *argp = rqstp->rq_argp;
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: COMMIT(3)   %s %u@%Lu\n",
@@ -647,233 +668,221 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 #define nfsd3_voidres			nfsd3_voidargs
 struct nfsd3_voidargs { int dummy; };
 
-#define PROC(name, argt, rest, relt, cache, respsize)	\
- { (svc_procfunc) nfsd3_proc_##name,		\
-   (kxdrproc_t) nfs3svc_decode_##argt##args,	\
-   (kxdrproc_t) nfs3svc_encode_##rest##res,	\
-   (kxdrproc_t) nfs3svc_release_##relt,		\
-   sizeof(struct nfsd3_##argt##args),		\
-   sizeof(struct nfsd3_##rest##res),		\
-   0,						\
-   cache,					\
-   respsize,					\
- }
-
 #define ST 1		/* status*/
 #define FH 17		/* filehandle with length */
 #define AT 21		/* attributes */
 #define pAT (1+AT)	/* post attributes - conditional */
 #define WC (7+pAT)	/* WCC attributes */
 
-static struct svc_procedure		nfsd_procedures3[22] = {
+static const struct svc_procedure nfsd_procedures3[22] = {
 	[NFS3PROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_null,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_voidres,
+		.pc_func = nfsd3_proc_null,
+		.pc_encode = nfs3svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd3_voidargs),
 		.pc_ressize = sizeof(struct nfsd3_voidres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
 	},
 	[NFS3PROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_getattr,
+		.pc_decode = nfs3svc_decode_fhandleargs,
+		.pc_encode = nfs3svc_encode_attrstatres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_attrstatres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
 	},
 	[NFS3PROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_setattr,
+		.pc_decode = nfs3svc_decode_sattrargs,
+		.pc_encode = nfs3svc_encode_wccstatres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_sattrargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_lookup,
+		.pc_decode = nfs3svc_decode_diropargs,
+		.pc_encode = nfs3svc_encode_diropres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_diropres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+FH+pAT+pAT,
 	},
 	[NFS3PROC_ACCESS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_access,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_accessres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_access,
+		.pc_decode = nfs3svc_decode_accessargs,
+		.pc_encode = nfs3svc_encode_accessres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_accessargs),
 		.pc_ressize = sizeof(struct nfsd3_accessres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+1,
 	},
 	[NFS3PROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_readlink,
+		.pc_decode = nfs3svc_decode_readlinkargs,
+		.pc_encode = nfs3svc_encode_readlinkres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
 	},
 	[NFS3PROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_read,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_read,
+		.pc_decode = nfs3svc_decode_readargs,
+		.pc_encode = nfs3svc_encode_readres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readargs),
 		.pc_ressize = sizeof(struct nfsd3_readres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
 	},
 	[NFS3PROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_write,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_writeres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_write,
+		.pc_decode = nfs3svc_decode_writeargs,
+		.pc_encode = nfs3svc_encode_writeres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_writeargs),
 		.pc_ressize = sizeof(struct nfsd3_writeres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC+4,
 	},
 	[NFS3PROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_create,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_create,
+		.pc_decode = nfs3svc_decode_createargs,
+		.pc_encode = nfs3svc_encode_createres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_createargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_mkdir,
+		.pc_decode = nfs3svc_decode_mkdirargs,
+		.pc_encode = nfs3svc_encode_createres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mkdirargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_symlink,
+		.pc_decode = nfs3svc_decode_symlinkargs,
+		.pc_encode = nfs3svc_encode_createres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_MKNOD] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_mknod,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_createres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_mknod,
+		.pc_decode = nfs3svc_decode_mknodargs,
+		.pc_encode = nfs3svc_encode_createres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_mknodargs),
 		.pc_ressize = sizeof(struct nfsd3_createres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+(1+FH+pAT)+WC,
 	},
 	[NFS3PROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_remove,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_remove,
+		.pc_decode = nfs3svc_decode_diropargs,
+		.pc_encode = nfs3svc_encode_wccstatres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_rmdir,
+		.pc_decode = nfs3svc_decode_diropargs,
+		.pc_encode = nfs3svc_encode_wccstatres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_diropargs),
 		.pc_ressize = sizeof(struct nfsd3_wccstatres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC,
 	},
 	[NFS3PROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_rename,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_renameres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_rename,
+		.pc_decode = nfs3svc_decode_renameargs,
+		.pc_encode = nfs3svc_encode_renameres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_renameargs),
 		.pc_ressize = sizeof(struct nfsd3_renameres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+WC+WC,
 	},
 	[NFS3PROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_link,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_linkres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle2,
+		.pc_func = nfsd3_proc_link,
+		.pc_decode = nfs3svc_decode_linkargs,
+		.pc_encode = nfs3svc_encode_linkres,
+		.pc_release = nfs3svc_release_fhandle2,
 		.pc_argsize = sizeof(struct nfsd3_linkargs),
 		.pc_ressize = sizeof(struct nfsd3_linkres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+pAT+WC,
 	},
 	[NFS3PROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_readdir,
+		.pc_decode = nfs3svc_decode_readdirargs,
+		.pc_encode = nfs3svc_encode_readdirres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_READDIRPLUS] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_readdirplus,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_readdirplus,
+		.pc_decode = nfs3svc_decode_readdirplusargs,
+		.pc_encode = nfs3svc_encode_readdirres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
 		.pc_ressize = sizeof(struct nfsd3_readdirres),
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFS3PROC_FSSTAT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsstat,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres,
+		.pc_func = nfsd3_proc_fsstat,
+		.pc_decode = nfs3svc_decode_fhandleargs,
+		.pc_encode = nfs3svc_encode_fsstatres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsstatres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+2*6+1,
 	},
 	[NFS3PROC_FSINFO] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_fsinfo,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores,
+		.pc_func = nfsd3_proc_fsinfo,
+		.pc_decode = nfs3svc_decode_fhandleargs,
+		.pc_encode = nfs3svc_encode_fsinfores,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_fsinfores),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+12,
 	},
 	[NFS3PROC_PATHCONF] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_pathconf,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres,
+		.pc_func = nfsd3_proc_pathconf,
+		.pc_decode = nfs3svc_decode_fhandleargs,
+		.pc_encode = nfs3svc_encode_pathconfres,
 		.pc_argsize = sizeof(struct nfsd3_fhandleargs),
 		.pc_ressize = sizeof(struct nfsd3_pathconfres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+pAT+6,
 	},
 	[NFS3PROC_COMMIT] = {
-		.pc_func = (svc_procfunc) nfsd3_proc_commit,
-		.pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs,
-		.pc_encode = (kxdrproc_t) nfs3svc_encode_commitres,
-		.pc_release = (kxdrproc_t) nfs3svc_release_fhandle,
+		.pc_func = nfsd3_proc_commit,
+		.pc_decode = nfs3svc_decode_commitargs,
+		.pc_encode = nfs3svc_encode_commitres,
+		.pc_release = nfs3svc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd3_commitargs),
 		.pc_ressize = sizeof(struct nfsd3_commitres),
 		.pc_cachetype = RC_NOCACHE,
@@ -881,10 +890,12 @@ static struct svc_procedure		nfsd_procedures3[22] = {
 	},
 };
 
-struct svc_version	nfsd_version3 = {
-		.vs_vers	= 3,
-		.vs_nproc	= 22,
-		.vs_proc	= nfsd_procedures3,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
+const struct svc_version nfsd_version3 = {
+	.vs_vers	= 3,
+	.vs_nproc	= 22,
+	.vs_proc	= nfsd_procedures3,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_count	= nfsd_count3,
+	.vs_xdrsize	= NFS3_SVC_XDRSIZE,
 };

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 4523346..bf444b6 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c

@@ -260,7 +260,7 @@ void fill_post_wcc(struct svc_fh *fhp)
 		printk("nfsd: inode locked twice during operation.\n");
 
 	err = fh_getattr(fhp, &fhp->fh_post_attr);
-	fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
+	fhp->fh_post_change = nfsd4_change_attribute(d_inode(fhp->fh_dentry));
 	if (err) {
 		fhp->fh_post_saved = false;
 		/* Grab the ctime anyway - set_change_info might use it */
@@ -273,8 +273,10 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -282,9 +284,10 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *a
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_sattrargs *args)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -300,9 +303,10 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropargs *args)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -311,9 +315,10 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessargs *args)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -323,9 +328,9 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readargs *args)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
@@ -353,9 +358,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeargs *args)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	unsigned int len, v, hdr, dlen;
 	u32 max_blocksize = svc_max_payload(rqstp);
 	struct kvec *head = rqstp->rq_arg.head;
@@ -413,9 +418,10 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -435,10 +441,12 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 
 	return xdr_argsize_check(rqstp, p);
 }
+
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_createargs *args)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_createargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh)) ||
 	    !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -448,9 +456,9 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_symlinkargs *args)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	unsigned int len, avail;
 	char *old, *new;
 	struct kvec *vec;
@@ -500,9 +508,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_mknodargs *args)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -522,9 +531,10 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameargs *args)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -535,9 +545,10 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkargs *args)
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -547,9 +558,10 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkargs *args)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -559,9 +571,9 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -576,9 +588,9 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirargs *args)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -602,9 +614,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitargs *args)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitargs *args = rqstp->rq_argp;
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -622,16 +634,17 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		lease_get_mtime(d_inode(resp->fh.fh_dentry),
 				&resp->stat.mtime);
@@ -642,18 +655,20 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->fh);
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		p = encode_fh(p, &resp->fh);
 		p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -664,9 +679,10 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_accessres *resp)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_accessres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0)
 		*p++ = htonl(resp->access);
@@ -675,9 +691,10 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readlinkres *resp)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->len);
@@ -696,9 +713,10 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readres *resp)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	if (resp->status == 0) {
 		*p++ = htonl(resp->count);
@@ -720,9 +738,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_writeres *resp)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_writeres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -737,9 +755,10 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_diropres *resp)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_diropres *resp = rqstp->rq_resp;
+
 	if (resp->status == 0) {
 		*p++ = xdr_one;
 		p = encode_fh(p, &resp->fh);
@@ -751,9 +770,10 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_renameres *resp)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_renameres *resp = rqstp->rq_resp;
+
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -761,9 +781,10 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_linkres *resp)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_linkres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 	p = encode_wcc_data(rqstp, p, &resp->tfh);
 	return xdr_ressize_check(rqstp, p);
@@ -771,9 +792,10 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_readdirres *resp)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_readdirres *resp = rqstp->rq_resp;
+
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
 
 	if (resp->status == 0) {
@@ -1021,9 +1043,9 @@ nfs3svc_encode_entry_plus(void *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsstatres *resp)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 	struct kstatfs	*s = &resp->stats;
 	u64		bs = s->f_bsize;
 
@@ -1043,9 +1065,10 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fsinfores *resp)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1067,9 +1090,10 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_pathconfres *resp)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
 	*p++ = xdr_zero;	/* no post_op_attr */
 
 	if (resp->status == 0) {
@@ -1086,9 +1110,9 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_commitres *resp)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd3_commitres *resp = rqstp->rq_resp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -1103,19 +1127,19 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 /*
  * XDR release functions
  */
-int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_attrstat *resp)
+void
+nfs3svc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }
 
-int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd3_fhandle_pair *resp)
+void
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp)
 {
+	struct nfsd3_fhandle_pair *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh1);
 	fh_put(&resp->fh2);
-	return 1;
 }

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0274db6..b45083c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c

@@ -468,7 +468,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
  * NB: Without this zero space reservation, callbacks over krb5p fail
  */
 static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 void *__unused)
+				 const void *__unused)
 {
 	xdr_reserve_space(xdr, 0);
 }
@@ -477,8 +477,9 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
 static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_delegation *dp = cb_to_delegation(cb);
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = cb->cb_clp->cl_cb_ident,
@@ -512,8 +513,9 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
  */
 static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -585,8 +587,9 @@ static void encode_cb_layout4args(struct xdr_stream *xdr,
 
 static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfsd4_callback *cb)
+				   const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfs4_layout_stateid *ls =
 		container_of(cb, struct nfs4_layout_stateid, ls_recall);
 	struct nfs4_cb_compound_hdr hdr = {
@@ -602,8 +605,9 @@ static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
 				  struct xdr_stream *xdr,
-				  struct nfsd4_callback *cb)
+				  void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -631,8 +635,9 @@ static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so
 
 static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 					struct xdr_stream *xdr,
-					const struct nfsd4_callback *cb)
+					const void *data)
 {
+	const struct nfsd4_callback *cb = data;
 	const struct nfsd4_blocked_lock *nbl =
 		container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
 	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
@@ -659,8 +664,9 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
 
 static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 					struct xdr_stream *xdr,
-					struct nfsd4_callback *cb)
+					void *data)
 {
+	struct nfsd4_callback *cb = data;
 	struct nfs4_cb_compound_hdr hdr;
 	int status;
 
@@ -682,15 +688,15 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
 #define PROC(proc, call, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
-	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
-	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
+	.p_encode  = nfs4_xdr_enc_##argtype,		\
+	.p_decode  = nfs4_xdr_dec_##restype,				\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
 	.p_statidx = NFSPROC4_CB_##call,				\
 	.p_name    = #proc,						\
 }
 
-static struct rpc_procinfo nfs4_cb_procedures[] = {
+static const struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
 	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
 #ifdef CONFIG_NFSD_PNFS
@@ -699,7 +705,8 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
 	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
 };
 
-static struct rpc_version nfs_cb_version4 = {
+static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
+static const struct rpc_version nfs_cb_version4 = {
 /*
  * Note on the callback rpc program version number: despite language in rfc
  * 5661 section 18.36.3 requiring servers to use 4 in this field, the
@@ -709,7 +716,8 @@ static struct rpc_version nfs_cb_version4 = {
  */
 	.number			= 1,
 	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
-	.procs			= nfs4_cb_procedures
+	.procs			= nfs4_cb_procedures,
+	.counts			= nfs4_cb_counts,
 };
 
 static const struct rpc_version *nfs_cb_version[] = {

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dadb3bf..d27e75a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c

@@ -344,8 +344,9 @@ copy_clientid(clientid_t *clid, struct nfsd4_session *session)
 
 static __be32
 nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_open *open)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_open *open = &u->open;
 	__be32 status;
 	struct svc_fh *resfh = NULL;
 	struct net *net = SVC_NET(rqstp);
@@ -467,14 +468,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op)
 {
-	struct nfsd4_open *open = (struct nfsd4_open *)&op->u;
+	struct nfsd4_open *open = &op->u.open;
 
 	if (!seqid_mutating_err(ntohl(op->status)))
 		return op->status;
 	if (nfsd4_has_session(cstate))
 		return op->status;
 	open->op_xdr_error = op->status;
-	return nfsd4_open(rqstp, cstate, open);
+	return nfsd4_open(rqstp, cstate, &op->u);
 }
 
 /*
@@ -482,19 +483,21 @@ static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_stat
  */
 static __be32
 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct svc_fh **getfh)
+	    union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
 
-	*getfh = &cstate->current_fh;
+	u->getfh = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_putfh *putfh)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_putfh *putfh = &u->putfh;
+
 	fh_put(&cstate->current_fh);
 	cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
 	memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
@@ -504,7 +507,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	__be32 status;
 
@@ -515,7 +518,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		void *arg)
+		union nfsd4_op_u *u)
 {
 	if (!cstate->save_fh.fh_dentry)
 		return nfserr_restorefh;
@@ -530,7 +533,7 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     void *arg)
+	     union nfsd4_op_u *u)
 {
 	if (!cstate->current_fh.fh_dentry)
 		return nfserr_nofilehandle;
@@ -548,8 +551,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
  */
 static __be32
 nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_access *access)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_access *access = &u->access;
+
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
 		return nfserr_inval;
 
@@ -574,8 +579,10 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
 
 static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_commit *commit)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_commit *commit = &u->commit;
+
 	gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
 	return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
 			     commit->co_count);
@@ -583,8 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_create *create)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_create *create = &u->create;
 	struct svc_fh resfh;
 	__be32 status;
 	dev_t rdev;
@@ -670,8 +678,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_getattr *getattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_getattr *getattr = &u->getattr;
 	__be32 status;
 
 	status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
@@ -691,8 +700,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_link *link)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_link *link = &u->link;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -723,24 +733,25 @@ static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh)
 
 static __be32
 nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      void *arg)
+	      union nfsd4_op_u *u)
 {
 	return nfsd4_do_lookupp(rqstp, &cstate->current_fh);
 }
 
 static __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_lookup *lookup)
+	     union nfsd4_op_u *u)
 {
 	return nfsd_lookup(rqstp, &cstate->current_fh,
-			   lookup->lo_name, lookup->lo_len,
+			   u->lookup.lo_name, u->lookup.lo_len,
 			   &cstate->current_fh);
 }
 
 static __be32
 nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_read *read)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_read *read = &u->read;
 	__be32 status;
 
 	read->rd_filp = NULL;
@@ -775,8 +786,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_readdir *readdir)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_readdir *readdir = &u->readdir;
 	u64 cookie = readdir->rd_cookie;
 	static const nfs4_verifier zeroverf;
 
@@ -800,17 +812,18 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_readlink *readlink)
+	       union nfsd4_op_u *u)
 {
-	readlink->rl_rqstp = rqstp;
-	readlink->rl_fhp = &cstate->current_fh;
+	u->readlink.rl_rqstp = rqstp;
+	u->readlink.rl_fhp = &cstate->current_fh;
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_remove *remove)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_remove *remove = &u->remove;
 	__be32 status;
 
 	if (opens_in_grace(SVC_NET(rqstp)))
@@ -826,8 +839,9 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_rename *rename)
+	     union nfsd4_op_u *u)
 {
+	struct nfsd4_rename *rename = &u->rename;
 	__be32 status = nfserr_nofilehandle;
 
 	if (!cstate->save_fh.fh_dentry)
@@ -847,8 +861,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo *secinfo)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_secinfo *secinfo = &u->secinfo;
 	struct svc_export *exp;
 	struct dentry *dentry;
 	__be32 err;
@@ -876,11 +891,11 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_secinfo_no_name *sin)
+		union nfsd4_op_u *u)
 {
 	__be32 err;
 
-	switch (sin->sin_style) {
+	switch (u->secinfo_no_name.sin_style) {
 	case NFS4_SECINFO_STYLE4_CURRENT_FH:
 		break;
 	case NFS4_SECINFO_STYLE4_PARENT:
@@ -892,15 +907,16 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
 		return nfserr_inval;
 	}
 
-	sin->sin_exp = exp_get(cstate->current_fh.fh_export);
+	u->secinfo_no_name.sin_exp = exp_get(cstate->current_fh.fh_export);
 	fh_put(&cstate->current_fh);
 	return nfs_ok;
 }
 
 static __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_setattr *setattr)
+	      union nfsd4_op_u *u)
 {
+	struct nfsd4_setattr *setattr = &u->setattr;
 	__be32 status = nfs_ok;
 	int err;
 
@@ -960,8 +976,9 @@ static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
 
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_write *write)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_write *write = &u->write;
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	__be32 status = nfs_ok;
@@ -1034,8 +1051,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_clone *clone)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_clone *clone = &u->clone;
 	struct file *src, *dst;
 	__be32 status;
 
@@ -1055,8 +1073,9 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_copy *copy)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_copy *copy = &u->copy;
 	struct file *src, *dst;
 	__be32 status;
 	ssize_t bytes;
@@ -1111,23 +1130,24 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	       struct nfsd4_fallocate *fallocate)
+	       union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate, 0);
+	return nfsd4_fallocate(rqstp, cstate, &u->allocate, 0);
 }
 
 static __be32
 nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		 struct nfsd4_fallocate *fallocate)
+		 union nfsd4_op_u *u)
 {
-	return nfsd4_fallocate(rqstp, cstate, fallocate,
+	return nfsd4_fallocate(rqstp, cstate, &u->deallocate,
 			       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE);
 }
 
 static __be32
 nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		struct nfsd4_seek *seek)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_seek *seek = &u->seek;
 	int whence;
 	__be32 status;
 	struct file *file;
@@ -1232,21 +1252,21 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 static __be32
 nfsd4_nverify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	      struct nfsd4_verify *verify)
+	      union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->verify);
 	return status == nfserr_not_same ? nfs_ok : status;
 }
 
 static __be32
 nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	     struct nfsd4_verify *verify)
+	     union nfsd4_op_u *u)
 {
 	__be32 status;
 
-	status = _nfsd4_verify(rqstp, cstate, verify);
+	status = _nfsd4_verify(rqstp, cstate, &u->nverify);
 	return status == nfserr_same ? nfs_ok : status;
 }
 
@@ -1271,9 +1291,9 @@ nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
 
 static __be32
 nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_getdeviceinfo *gdp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_getdeviceinfo *gdp = &u->getdeviceinfo;
 	const struct nfsd4_layout_ops *ops;
 	struct nfsd4_deviceid_map *map;
 	struct svc_export *exp;
@@ -1317,9 +1337,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutget(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutget *lgp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutget *lgp = &u->layoutget;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
 	struct nfs4_layout_stateid *ls;
@@ -1397,9 +1417,9 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutcommit(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutcommit *lcp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
 	const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	const struct nfsd4_layout_ops *ops;
@@ -1461,9 +1481,9 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
 
 static __be32
 nfsd4_layoutreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *cstate,
-		struct nfsd4_layoutreturn *lrp)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	__be32 nfserr;
 
@@ -1510,7 +1530,7 @@ nfsd4_layoutreturn(struct svc_rqst *rqstp,
  * NULL call.
  */
 static __be32
-nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd4_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -1521,12 +1541,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 		nfsdstats.nfs4_opcount[opnum]++;
 }
 
-typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
-			      void *);
-typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
-typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *);
-typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *);
-
 enum nfsd4_op_flags {
 	ALLOWED_WITHOUT_FH = 1 << 0,	/* No current filehandle required */
 	ALLOWED_ON_ABSENT_FS = 1 << 1,	/* ops processed on absent fs */
@@ -1558,16 +1572,19 @@ enum nfsd4_op_flags {
 };
 
 struct nfsd4_operation {
-	nfsd4op_func op_func;
+	__be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 	u32 op_flags;
 	char *op_name;
 	/* Try to get response size before operation */
-	nfsd4op_rsize op_rsize_bop;
-	stateid_getter op_get_currentstateid;
-	stateid_setter op_set_currentstateid;
+	u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
+	void (*op_get_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
+	void (*op_set_currentstateid)(struct nfsd4_compound_state *,
+			union nfsd4_op_u *);
 };
 
-static struct nfsd4_operation nfsd4_ops[];
+static const struct nfsd4_operation nfsd4_ops[];
 
 static const char *nfsd4_op_name(unsigned opnum);
 
@@ -1604,7 +1621,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
 	return nfs_ok;
 }
 
-static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
 {
 	return &nfsd4_ops[op->opnum];
 }
@@ -1622,10 +1639,9 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	struct nfsd4_compoundargs *argp = rqstp->rq_argp;
 	struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
 	struct nfsd4_op *next = &argp->ops[resp->opcnt];
-	struct nfsd4_operation *thisd;
-	struct nfsd4_operation *nextd;
+	const struct nfsd4_operation *thisd = OPDESC(this);
+	const struct nfsd4_operation *nextd;
 
-	thisd = OPDESC(this);
 	/*
 	 * Most ops check wronsec on our own; only the putfh-like ops
 	 * have special rules.
@@ -1673,12 +1689,12 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
  * COMPOUND call.
  */
 static __be32
-nfsd4_proc_compound(struct svc_rqst *rqstp,
-		    struct nfsd4_compoundargs *args,
-		    struct nfsd4_compoundres *resp)
+nfsd4_proc_compound(struct svc_rqst *rqstp)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfsd4_op	*op;
-	struct nfsd4_operation *opdesc;
+	const struct nfsd4_operation *opdesc;
 	struct nfsd4_compound_state *cstate = &resp->cstate;
 	struct svc_fh *current_fh = &cstate->current_fh;
 	struct svc_fh *save_fh = &cstate->save_fh;
@@ -2091,360 +2107,360 @@ static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 	return (op_encode_hdr_size + 3) * sizeof(__be32);
 }
 
-static struct nfsd4_operation nfsd4_ops[] = {
+static const struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
-		.op_func = (nfsd4op_func)nfsd4_access,
+		.op_func = nfsd4_access,
 		.op_name = "OP_ACCESS",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
+		.op_rsize_bop = nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
-		.op_func = (nfsd4op_func)nfsd4_close,
+		.op_func = nfsd4_close,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CLOSE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
+		.op_get_currentstateid = nfsd4_get_closestateid,
+		.op_set_currentstateid = nfsd4_set_closestateid,
 	},
 	[OP_COMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_commit,
+		.op_func = nfsd4_commit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_COMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
+		.op_rsize_bop = nfsd4_commit_rsize,
 	},
 	[OP_CREATE] = {
-		.op_func = (nfsd4op_func)nfsd4_create,
+		.op_func = nfsd4_create,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID,
 		.op_name = "OP_CREATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
+		.op_rsize_bop = nfsd4_create_rsize,
 	},
 	[OP_DELEGRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_delegreturn,
+		.op_func = nfsd4_delegreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DELEGRETURN",
 		.op_rsize_bop = nfsd4_only_status_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid,
+		.op_get_currentstateid = nfsd4_get_delegreturnstateid,
 	},
 	[OP_GETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_getattr,
+		.op_func = nfsd4_getattr,
 		.op_flags = ALLOWED_ON_ABSENT_FS,
 		.op_rsize_bop = nfsd4_getattr_rsize,
 		.op_name = "OP_GETATTR",
 	},
 	[OP_GETFH] = {
-		.op_func = (nfsd4op_func)nfsd4_getfh,
+		.op_func = nfsd4_getfh,
 		.op_name = "OP_GETFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
+		.op_rsize_bop = nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
-		.op_func = (nfsd4op_func)nfsd4_link,
+		.op_func = nfsd4_link,
 		.op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING
 				| OP_CACHEME,
 		.op_name = "OP_LINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
+		.op_rsize_bop = nfsd4_link_rsize,
 	},
 	[OP_LOCK] = {
-		.op_func = (nfsd4op_func)nfsd4_lock,
+		.op_func = nfsd4_lock,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid,
+		.op_rsize_bop = nfsd4_lock_rsize,
+		.op_set_currentstateid = nfsd4_set_lockstateid,
 	},
 	[OP_LOCKT] = {
-		.op_func = (nfsd4op_func)nfsd4_lockt,
+		.op_func = nfsd4_lockt,
 		.op_name = "OP_LOCKT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
+		.op_rsize_bop = nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
-		.op_func = (nfsd4op_func)nfsd4_locku,
+		.op_func = nfsd4_locku,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LOCKU",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
+		.op_get_currentstateid = nfsd4_get_lockustateid,
 	},
 	[OP_LOOKUP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookup,
+		.op_func = nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
-		.op_func = (nfsd4op_func)nfsd4_lookupp,
+		.op_func = nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_nverify,
+		.op_func = nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
-		.op_func = (nfsd4op_func)nfsd4_open,
+		.op_func = nfsd4_open,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid,
+		.op_rsize_bop = nfsd4_open_rsize,
+		.op_set_currentstateid = nfsd4_set_openstateid,
 	},
 	[OP_OPEN_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_open_confirm,
+		.op_func = nfsd4_open_confirm,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
 	},
 	[OP_OPEN_DOWNGRADE] = {
-		.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+		.op_func = nfsd4_open_downgrade,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_OPEN_DOWNGRADE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid,
-		.op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid,
+		.op_rsize_bop = nfsd4_status_stateid_rsize,
+		.op_get_currentstateid = nfsd4_get_opendowngradestateid,
+		.op_set_currentstateid = nfsd4_set_opendowngradestateid,
 	},
 	[OP_PUTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putfh,
+		.op_func = nfsd4_putfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTPUBFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTPUBFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_PUTROOTFH] = {
-		.op_func = (nfsd4op_func)nfsd4_putrootfh,
+		.op_func = nfsd4_putrootfh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
 		.op_name = "OP_PUTROOTFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_READ] = {
-		.op_func = (nfsd4op_func)nfsd4_read,
+		.op_func = nfsd4_read,
 		.op_name = "OP_READ",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
+		.op_rsize_bop = nfsd4_read_rsize,
+		.op_get_currentstateid = nfsd4_get_readstateid,
 	},
 	[OP_READDIR] = {
-		.op_func = (nfsd4op_func)nfsd4_readdir,
+		.op_func = nfsd4_readdir,
 		.op_name = "OP_READDIR",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
+		.op_rsize_bop = nfsd4_readdir_rsize,
 	},
 	[OP_READLINK] = {
-		.op_func = (nfsd4op_func)nfsd4_readlink,
+		.op_func = nfsd4_readlink,
 		.op_name = "OP_READLINK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
+		.op_rsize_bop = nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
-		.op_func = (nfsd4op_func)nfsd4_remove,
+		.op_func = nfsd4_remove,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_REMOVE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
+		.op_rsize_bop = nfsd4_remove_rsize,
 	},
 	[OP_RENAME] = {
-		.op_func = (nfsd4op_func)nfsd4_rename,
+		.op_func = nfsd4_rename,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_RENAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
+		.op_rsize_bop = nfsd4_rename_rsize,
 	},
 	[OP_RENEW] = {
-		.op_func = (nfsd4op_func)nfsd4_renew,
+		.op_func = nfsd4_renew,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RENEW",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 
 	},
 	[OP_RESTOREFH] = {
-		.op_func = (nfsd4op_func)nfsd4_restorefh,
+		.op_func = nfsd4_restorefh,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RESTOREFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SAVEFH] = {
-		.op_func = (nfsd4op_func)nfsd4_savefh,
+		.op_func = nfsd4_savefh,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_SAVEFH",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo,
+		.op_func = nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
-		.op_func = (nfsd4op_func)nfsd4_setattr,
+		.op_func = nfsd4_setattr,
 		.op_name = "OP_SETATTR",
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid,
+		.op_rsize_bop = nfsd4_setattr_rsize,
+		.op_get_currentstateid = nfsd4_get_setattrstateid,
 	},
 	[OP_SETCLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid,
+		.op_func = nfsd4_setclientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
+		.op_rsize_bop = nfsd4_setclientid_rsize,
 	},
 	[OP_SETCLIENTID_CONFIRM] = {
-		.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
+		.op_func = nfsd4_setclientid_confirm,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_SETCLIENTID_CONFIRM",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_VERIFY] = {
-		.op_func = (nfsd4op_func)nfsd4_verify,
+		.op_func = nfsd4_verify,
 		.op_name = "OP_VERIFY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
-		.op_func = (nfsd4op_func)nfsd4_write,
+		.op_func = nfsd4_write,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_WRITE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
+		.op_rsize_bop = nfsd4_write_rsize,
+		.op_get_currentstateid = nfsd4_get_writestateid,
 	},
 	[OP_RELEASE_LOCKOWNER] = {
-		.op_func = (nfsd4op_func)nfsd4_release_lockowner,
+		.op_func = nfsd4_release_lockowner,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RELEASE_LOCKOWNER",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 
 	/* NFSv4.1 operations */
 	[OP_EXCHANGE_ID] = {
-		.op_func = (nfsd4op_func)nfsd4_exchange_id,
+		.op_func = nfsd4_exchange_id,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_EXCHANGE_ID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
+		.op_rsize_bop = nfsd4_exchange_id_rsize,
 	},
 	[OP_BACKCHANNEL_CTL] = {
-		.op_func = (nfsd4op_func)nfsd4_backchannel_ctl,
+		.op_func = nfsd4_backchannel_ctl,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BACKCHANNEL_CTL",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_BIND_CONN_TO_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
+		.op_func = nfsd4_bind_conn_to_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_BIND_CONN_TO_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
+		.op_rsize_bop = nfsd4_bind_conn_to_session_rsize,
 	},
 	[OP_CREATE_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_create_session,
+		.op_func = nfsd4_create_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_CREATE_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
+		.op_rsize_bop = nfsd4_create_session_rsize,
 	},
 	[OP_DESTROY_SESSION] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_session,
+		.op_func = nfsd4_destroy_session,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_SESSION",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SEQUENCE] = {
-		.op_func = (nfsd4op_func)nfsd4_sequence,
+		.op_func = nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize,
+		.op_rsize_bop = nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
-		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
+		.op_func = nfsd4_destroy_clientid,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
 				| OP_MODIFIES_SOMETHING,
 		.op_name = "OP_DESTROY_CLIENTID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_RECLAIM_COMPLETE] = {
-		.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
+		.op_func = nfsd4_reclaim_complete,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_RECLAIM_COMPLETE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_SECINFO_NO_NAME] = {
-		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
+		.op_func = nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
+		.op_rsize_bop = nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_test_stateid,
+		.op_func = nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
+		.op_rsize_bop = nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
-		.op_func = (nfsd4op_func)nfsd4_free_stateid,
+		.op_func = nfsd4_free_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
 		.op_name = "OP_FREE_STATEID",
-		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_get_currentstateid = nfsd4_get_freestateid,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 #ifdef CONFIG_NFSD_PNFS
 	[OP_GETDEVICEINFO] = {
-		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
+		.op_func = nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
+		.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutget,
+		.op_func = nfsd4_layoutget,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTGET",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
+		.op_rsize_bop = nfsd4_layoutget_rsize,
 	},
 	[OP_LAYOUTCOMMIT] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
+		.op_func = nfsd4_layoutcommit,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTCOMMIT",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
+		.op_rsize_bop = nfsd4_layoutcommit_rsize,
 	},
 	[OP_LAYOUTRETURN] = {
-		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
+		.op_func = nfsd4_layoutreturn,
 		.op_flags = OP_MODIFIES_SOMETHING,
 		.op_name = "OP_LAYOUTRETURN",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
+		.op_rsize_bop = nfsd4_layoutreturn_rsize,
 	},
 #endif /* CONFIG_NFSD_PNFS */
 
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_allocate,
+		.op_func = nfsd4_allocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_ALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_DEALLOCATE] = {
-		.op_func = (nfsd4op_func)nfsd4_deallocate,
+		.op_func = nfsd4_deallocate,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_DEALLOCATE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_CLONE] = {
-		.op_func = (nfsd4op_func)nfsd4_clone,
+		.op_func = nfsd4_clone,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_CLONE",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+		.op_rsize_bop = nfsd4_only_status_rsize,
 	},
 	[OP_COPY] = {
-		.op_func = (nfsd4op_func)nfsd4_copy,
+		.op_func = nfsd4_copy,
 		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
 		.op_name = "OP_COPY",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
+		.op_rsize_bop = nfsd4_copy_rsize,
 	},
 	[OP_SEEK] = {
-		.op_func = (nfsd4op_func)nfsd4_seek,
+		.op_func = nfsd4_seek,
 		.op_name = "OP_SEEK",
-		.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
+		.op_rsize_bop = nfsd4_seek_rsize,
 	},
 };
 
@@ -2515,19 +2531,19 @@ static const char *nfsd4_op_name(unsigned opnum)
 #define nfsd4_voidres			nfsd4_voidargs
 struct nfsd4_voidargs { int dummy; };
 
-static struct svc_procedure		nfsd_procedures4[2] = {
+static const struct svc_procedure nfsd_procedures4[2] = {
 	[NFSPROC4_NULL] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_null,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_voidres,
+		.pc_func = nfsd4_proc_null,
+		.pc_encode = nfs4svc_encode_voidres,
 		.pc_argsize = sizeof(struct nfsd4_voidargs),
 		.pc_ressize = sizeof(struct nfsd4_voidres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = 1,
 	},
 	[NFSPROC4_COMPOUND] = {
-		.pc_func = (svc_procfunc) nfsd4_proc_compound,
-		.pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs,
-		.pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres,
+		.pc_func = nfsd4_proc_compound,
+		.pc_decode = nfs4svc_decode_compoundargs,
+		.pc_encode = nfs4svc_encode_compoundres,
 		.pc_argsize = sizeof(struct nfsd4_compoundargs),
 		.pc_ressize = sizeof(struct nfsd4_compoundres),
 		.pc_release = nfsd4_release_compoundargs,
@@ -2536,10 +2552,12 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 	},
 };
 
-struct svc_version	nfsd_version4 = {
+static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
+const struct svc_version nfsd_version4 = {
 	.vs_vers		= 4,
 	.vs_nproc		= 2,
 	.vs_proc		= nfsd_procedures4,
+	.vs_count		= nfsd_count3,
 	.vs_dispatch		= nfsd_dispatch,
 	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
 	.vs_rpcb_optnl		= true,

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 22002fb..0c04f81 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c

@@ -2402,10 +2402,10 @@ static bool client_has_state(struct nfs4_client *clp)
 }
 
 __be32
-nfsd4_exchange_id(struct svc_rqst *rqstp,
-		  struct nfsd4_compound_state *cstate,
-		  struct nfsd4_exchange_id *exid)
+nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_exchange_id *exid = &u->exchange_id;
 	struct nfs4_client *conf, *new;
 	struct nfs4_client *unconf = NULL;
 	__be32 status;
@@ -2698,9 +2698,9 @@ static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
 
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_create_session *cr_ses)
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_create_session *cr_ses = &u->create_session;
 	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
@@ -2824,8 +2824,11 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
 	return nfserr_inval;
 }
 
-__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
+__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
 	struct nfsd4_session *session = cstate->session;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	__be32 status;
@@ -2845,8 +2848,9 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state
 
 __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_bind_conn_to_session *bcts)
+		     union nfsd4_op_u *u)
 {
+	struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
 	__be32 status;
 	struct nfsd4_conn *conn;
 	struct nfsd4_session *session;
@@ -2886,10 +2890,10 @@ static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4
 }
 
 __be32
-nfsd4_destroy_session(struct svc_rqst *r,
-		      struct nfsd4_compound_state *cstate,
-		      struct nfsd4_destroy_session *sessionid)
+nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_session *sessionid = &u->destroy_session;
 	struct nfsd4_session *ses;
 	__be32 status;
 	int ref_held_by_me = 0;
@@ -2983,10 +2987,10 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
 }
 
 __be32
-nfsd4_sequence(struct svc_rqst *rqstp,
-	       struct nfsd4_compound_state *cstate,
-	       struct nfsd4_sequence *seq)
+nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_sequence *seq = &u->sequence;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_stream *xdr = &resp->xdr;
 	struct nfsd4_session *session;
@@ -3120,8 +3124,11 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp)
 }
 
 __be32
-nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
+nfsd4_destroy_clientid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *clp = NULL;
 	__be32 status = 0;
@@ -3161,8 +3168,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 }
 
 __be32
-nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
+nfsd4_reclaim_complete(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
 	__be32 status = 0;
 
 	if (rc->rca_one_fs) {
@@ -3199,8 +3208,9 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
 
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_setclientid *setclid)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid *setclid = &u->setclientid;
 	struct xdr_netobj 	clname = setclid->se_name;
 	nfs4_verifier		clverifier = setclid->se_verf;
 	struct nfs4_client	*conf, *new;
@@ -3257,9 +3267,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-			 struct nfsd4_compound_state *cstate,
-			 struct nfsd4_setclientid_confirm *setclientid_confirm)
+			struct nfsd4_compound_state *cstate,
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_setclientid_confirm *setclientid_confirm =
+			&u->setclientid_confirm;
 	struct nfs4_client *conf, *unconf;
 	struct nfs4_client *old = NULL;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
@@ -4506,8 +4518,9 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    clientid_t *clid)
+	    union nfsd4_op_u *u)
 {
+	clientid_t *clid = &u->renew;
 	struct nfs4_client *clp;
 	__be32 status;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -4993,8 +5006,9 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
  */
 __be32
 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_test_stateid *test_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
 	struct nfsd4_test_stateid_id *stateid;
 	struct nfs4_client *cl = cstate->session->se_client;
 
@@ -5033,8 +5047,9 @@ nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
 
 __be32
 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_free_stateid *free_stateid)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
 	stateid_t *stateid = &free_stateid->fr_stateid;
 	struct nfs4_stid *s;
 	struct nfs4_delegation *dp;
@@ -5162,8 +5177,9 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 
 __be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		   struct nfsd4_open_confirm *oc)
+		   union nfsd4_op_u *u)
 {
+	struct nfsd4_open_confirm *oc = &u->open_confirm;
 	__be32 status;
 	struct nfs4_openowner *oo;
 	struct nfs4_ol_stateid *stp;
@@ -5230,9 +5246,9 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
 
 __be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		     struct nfsd4_compound_state *cstate,
-		     struct nfsd4_open_downgrade *od)
+		     struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
 {
+	struct nfsd4_open_downgrade *od = &u->open_downgrade;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -5300,8 +5316,9 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
  */
 __be32
 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_close *close)
+		union nfsd4_op_u *u)
 {
+	struct nfsd4_close *close = &u->close;
 	__be32 status;
 	struct nfs4_ol_stateid *stp;
 	struct net *net = SVC_NET(rqstp);
@@ -5330,8 +5347,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-		  struct nfsd4_delegreturn *dr)
+		  union nfsd4_op_u *u)
 {
+	struct nfsd4_delegreturn *dr = &u->delegreturn;
 	struct nfs4_delegation *dp;
 	stateid_t *stateid = &dr->dr_stateid;
 	struct nfs4_stid *s;
@@ -5706,8 +5724,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
  */
 __be32
 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	   struct nfsd4_lock *lock)
+	   union nfsd4_op_u *u)
 {
+	struct nfsd4_lock *lock = &u->lock;
 	struct nfs4_openowner *open_sop = NULL;
 	struct nfs4_lockowner *lock_sop = NULL;
 	struct nfs4_ol_stateid *lock_stp = NULL;
@@ -5939,8 +5958,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
  */
 __be32
 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_lockt *lockt)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_lockt *lockt = &u->lockt;
 	struct file_lock *file_lock = NULL;
 	struct nfs4_lockowner *lo = NULL;
 	__be32 status;
@@ -6012,8 +6032,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
-	    struct nfsd4_locku *locku)
+	    union nfsd4_op_u *u)
 {
+	struct nfsd4_locku *locku = &u->locku;
 	struct nfs4_ol_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock *file_lock = NULL;
@@ -6119,8 +6140,9 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 			struct nfsd4_compound_state *cstate,
-			struct nfsd4_release_lockowner *rlockowner)
+			union nfsd4_op_u *u)
 {
+	struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
 	clientid_t *clid = &rlockowner->rl_clientid;
 	struct nfs4_stateowner *sop;
 	struct nfs4_lockowner *lo = NULL;
@@ -7103,27 +7125,31 @@ clear_current_stateid(struct nfsd4_compound_state *cstate)
  * functions to set current state id
  */
 void
-nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &odp->od_stateid);
+	put_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+nfsd4_set_openstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &open->op_stateid);
+	put_stateid(cstate, &u->open.op_stateid);
 }
 
 void
-nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_set_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &close->cl_stateid);
+	put_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
+nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	put_stateid(cstate, &lock->lk_resp_stateid);
+	put_stateid(cstate, &u->lock.lk_resp_stateid);
 }
 
 /*
@@ -7131,49 +7157,57 @@ nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lo
  */
 
 void
-nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
+nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &odp->od_stateid);
+	get_stateid(cstate, &u->open_downgrade.od_stateid);
 }
 
 void
-nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
+nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &drp->dr_stateid);
+	get_stateid(cstate, &u->delegreturn.dr_stateid);
 }
 
 void
-nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
+nfsd4_get_freestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &fsp->fr_stateid);
+	get_stateid(cstate, &u->free_stateid.fr_stateid);
 }
 
 void
-nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
+nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &setattr->sa_stateid);
+	get_stateid(cstate, &u->setattr.sa_stateid);
 }
 
 void
-nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
+nfsd4_get_closestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &close->cl_stateid);
+	get_stateid(cstate, &u->close.cl_stateid);
 }
 
 void
-nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
+nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &locku->lu_stateid);
+	get_stateid(cstate, &u->locku.lu_stateid);
 }
 
 void
-nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
+nfsd4_get_readstateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &read->rd_stateid);
+	get_stateid(cstate, &u->read.rd_stateid);
 }
 
 void
-nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
+nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
+		union nfsd4_op_u *u)
 {
-	get_stateid(cstate, &write->wr_stateid);
+	get_stateid(cstate, &u->write.wr_stateid);
 }

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 26780d5..20fbcab 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c

@@ -1973,7 +1973,7 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
 		*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
 		*p++ = 0;
 	} else if (IS_I_VERSION(inode)) {
-		p = xdr_encode_hyper(p, inode->i_version);
+		p = xdr_encode_hyper(p, nfsd4_change_attribute(inode));
 	} else {
 		*p++ = cpu_to_be32(stat->ctime.tv_sec);
 		*p++ = cpu_to_be32(stat->ctime.tv_nsec);
@@ -4538,14 +4538,13 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
 }
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
 {
         return xdr_ressize_check(rqstp, p);
 }
 
-int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
+void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 {
-	struct svc_rqst *rqstp = rq;
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	if (args->ops != args->iops) {
@@ -4559,12 +4558,13 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
 		args->to_free = tb->next;
 		kfree(tb);
 	}
-	return 1;
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
 	if (rqstp->rq_arg.head[0].iov_len % 4) {
 		/* client is nuts */
 		dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
@@ -4584,11 +4584,12 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_comp
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct xdr_buf *buf = resp->xdr.buf;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d966068..b9c538a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h

@@ -60,7 +60,7 @@ struct readdir_cd {
 
 
 extern struct svc_program	nfsd_program;
-extern struct svc_version	nfsd_version2, nfsd_version3,
+extern const struct svc_version	nfsd_version2, nfsd_version3,
 				nfsd_version4;
 extern struct mutex		nfsd_mutex;
 extern spinlock_t		nfsd_drc_lock;
@@ -86,12 +86,12 @@ void		nfsd_destroy(struct net *net);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
-extern struct svc_version nfsd_acl_version2;
+extern const struct svc_version nfsd_acl_version2;
 #else
 #define nfsd_acl_version2 NULL
 #endif
 #ifdef CONFIG_NFSD_V3_ACL
-extern struct svc_version nfsd_acl_version3;
+extern const struct svc_version nfsd_acl_version3;
 #else
 #define nfsd_acl_version3 NULL
 #endif

diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index f84fe6b..e47cf6c 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h

@@ -241,6 +241,28 @@ fh_clear_wcc(struct svc_fh *fhp)
 }
 
 /*
+ * We could use i_version alone as the change attribute.  However,
+ * i_version can go backwards after a reboot.  On its own that doesn't
+ * necessarily cause a problem, but if i_version goes backwards and then
+ * is incremented again it could reuse a value that was previously used
+ * before boot, and a client who queried the two values might
+ * incorrectly assume nothing changed.
+ *
+ * By using both ctime and the i_version counter we guarantee that as
+ * long as time doesn't go backwards we never reuse an old value.
+ */
+static inline u64 nfsd4_change_attribute(struct inode *inode)
+{
+	u64 chattr;
+
+	chattr =  inode->i_ctime.tv_sec;
+	chattr <<= 30;
+	chattr += inode->i_ctime.tv_nsec;
+	chattr += inode->i_version;
+	return chattr;
+}
+
+/*
  * Fill in the pre_op attr for the wcc data
  */
 static inline void
@@ -253,7 +275,7 @@ fill_pre_wcc(struct svc_fh *fhp)
 		fhp->fh_pre_mtime = inode->i_mtime;
 		fhp->fh_pre_ctime = inode->i_ctime;
 		fhp->fh_pre_size  = inode->i_size;
-		fhp->fh_pre_change = inode->i_version;
+		fhp->fh_pre_change = nfsd4_change_attribute(inode);
 		fhp->fh_pre_saved = true;
 	}
 }

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 03a7e9d..5076ae2 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c

@@ -17,7 +17,7 @@ typedef struct svc_buf	svc_buf;
 
 
 static __be32
-nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
 {
 	return nfs_ok;
 }
@@ -39,9 +39,10 @@ nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
-					  struct nfsd_attrstat *resp)
+nfsd_proc_getattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32 nfserr;
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
 
@@ -56,9 +57,10 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
-					  struct nfsd_attrstat  *resp)
+nfsd_proc_setattr(struct svc_rqst *rqstp)
 {
+	struct nfsd_sattrargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	struct iattr *iap = &argp->attrs;
 	struct svc_fh *fhp;
 	__be32 nfserr;
@@ -122,9 +124,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 struct nfsd_diropres  *resp)
+nfsd_proc_lookup(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LOOKUP   %s %.*s\n",
@@ -142,9 +145,10 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a symlink.
  */
 static __be32
-nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
-					   struct nfsd_readlinkres *resp)
+nfsd_proc_readlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
@@ -162,9 +166,10 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
-				       struct nfsd_readres  *resp)
+nfsd_proc_read(struct svc_rqst *rqstp)
 {
+	struct nfsd_readargs *argp = rqstp->rq_argp;
+	struct nfsd_readres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: READ    %s %d bytes at %d\n",
@@ -200,9 +205,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
-					struct nfsd_attrstat  *resp)
+nfsd_proc_write(struct svc_rqst *rqstp)
 {
+	struct nfsd_writeargs *argp = rqstp->rq_argp;
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
 	__be32	nfserr;
 	unsigned long cnt = argp->len;
 
@@ -222,9 +228,10 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
 static __be32
-nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					 struct nfsd_diropres   *resp)
+nfsd_proc_create(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	svc_fh		*dirfhp = &argp->fh;
 	svc_fh		*newfhp = &resp->fh;
 	struct iattr	*attr = &argp->attrs;
@@ -377,9 +384,9 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 }
 
 static __be32
-nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-					 void		       *resp)
+nfsd_proc_remove(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: REMOVE   %s %.*s\n", SVCFH_fmt(&argp->fh),
@@ -392,9 +399,9 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 }
 
 static __be32
-nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
-				  	 void		        *resp)
+nfsd_proc_rename(struct svc_rqst *rqstp)
 {
+	struct nfsd_renameargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RENAME   %s %.*s -> \n",
@@ -410,9 +417,9 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 }
 
 static __be32
-nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
-				void			    *resp)
+nfsd_proc_link(struct svc_rqst *rqstp)
 {
+	struct nfsd_linkargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: LINK     %s ->\n",
@@ -430,9 +437,9 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 }
 
 static __be32
-nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
-				          void			  *resp)
+nfsd_proc_symlink(struct svc_rqst *rqstp)
 {
+	struct nfsd_symlinkargs *argp = rqstp->rq_argp;
 	struct svc_fh	newfh;
 	__be32		nfserr;
 
@@ -460,9 +467,10 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * N.B. After this call resp->fh needs an fh_put
  */
 static __be32
-nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
-					struct nfsd_diropres   *resp)
+nfsd_proc_mkdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_createargs *argp = rqstp->rq_argp;
+	struct nfsd_diropres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: MKDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -484,9 +492,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
  * Remove a directory
  */
 static __be32
-nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
-				 	void		      *resp)
+nfsd_proc_rmdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_diropargs *argp = rqstp->rq_argp;
 	__be32	nfserr;
 
 	dprintk("nfsd: RMDIR    %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
@@ -500,9 +508,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
  * Read a portion of a directory.
  */
 static __be32
-nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
-					  struct nfsd_readdirres  *resp)
+nfsd_proc_readdir(struct svc_rqst *rqstp)
 {
+	struct nfsd_readdirargs *argp = rqstp->rq_argp;
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	int		count;
 	__be32		nfserr;
 	loff_t		offset;
@@ -540,9 +549,10 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
  * Get file system info
  */
 static __be32
-nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
-					  struct nfsd_statfsres *resp)
+nfsd_proc_statfs(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *argp = rqstp->rq_argp;
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	__be32	nfserr;
 
 	dprintk("nfsd: STATFS   %s\n", SVCFH_fmt(&argp->fh));
@@ -563,168 +573,168 @@ struct nfsd_void { int dummy; };
 #define FH 8		/* filehandle */
 #define	AT 18		/* attributes */
 
-static struct svc_procedure		nfsd_procedures2[18] = {
+static const struct svc_procedure nfsd_procedures2[18] = {
 	[NFSPROC_NULL] = {
-		.pc_func = (svc_procfunc) nfsd_proc_null,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_null,
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_GETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_getattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_getattr,
+		.pc_decode = nfssvc_decode_fhandle,
+		.pc_encode = nfssvc_encode_attrstat,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_SETATTR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_setattr,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_setattr,
+		.pc_decode = nfssvc_decode_sattrargs,
+		.pc_encode = nfssvc_encode_attrstat,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_sattrargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_ROOT] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LOOKUP] = {
-		.pc_func = (svc_procfunc) nfsd_proc_lookup,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_lookup,
+		.pc_decode = nfssvc_decode_diropargs,
+		.pc_encode = nfssvc_encode_diropres,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_READLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres,
+		.pc_func = nfsd_proc_readlink,
+		.pc_decode = nfssvc_decode_readlinkargs,
+		.pc_encode = nfssvc_encode_readlinkres,
 		.pc_argsize = sizeof(struct nfsd_readlinkargs),
 		.pc_ressize = sizeof(struct nfsd_readlinkres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
 	},
 	[NFSPROC_READ] = {
-		.pc_func = (svc_procfunc) nfsd_proc_read,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_read,
+		.pc_decode = nfssvc_decode_readargs,
+		.pc_encode = nfssvc_encode_readres,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_readargs),
 		.pc_ressize = sizeof(struct nfsd_readres),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
 	},
 	[NFSPROC_WRITECACHE] = {
-		.pc_decode = (kxdrproc_t) nfssvc_decode_void,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_decode = nfssvc_decode_void,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_void),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_NOCACHE,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_WRITE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_write,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_writeargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_attrstat,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_write,
+		.pc_decode = nfssvc_decode_writeargs,
+		.pc_encode = nfssvc_encode_attrstat,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_writeargs),
 		.pc_ressize = sizeof(struct nfsd_attrstat),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+AT,
 	},
 	[NFSPROC_CREATE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_create,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_create,
+		.pc_decode = nfssvc_decode_createargs,
+		.pc_encode = nfssvc_encode_diropres,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_REMOVE] = {
-		.pc_func = (svc_procfunc) nfsd_proc_remove,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_remove,
+		.pc_decode = nfssvc_decode_diropargs,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_RENAME] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rename,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_renameargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_rename,
+		.pc_decode = nfssvc_decode_renameargs,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_renameargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_LINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_link,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_linkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_link,
+		.pc_decode = nfssvc_decode_linkargs,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_linkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_SYMLINK] = {
-		.pc_func = (svc_procfunc) nfsd_proc_symlink,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_symlink,
+		.pc_decode = nfssvc_decode_symlinkargs,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_symlinkargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_MKDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_mkdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_createargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_diropres,
-		.pc_release = (kxdrproc_t) nfssvc_release_fhandle,
+		.pc_func = nfsd_proc_mkdir,
+		.pc_decode = nfssvc_decode_createargs,
+		.pc_encode = nfssvc_encode_diropres,
+		.pc_release = nfssvc_release_fhandle,
 		.pc_argsize = sizeof(struct nfsd_createargs),
 		.pc_ressize = sizeof(struct nfsd_diropres),
 		.pc_cachetype = RC_REPLBUFF,
 		.pc_xdrressize = ST+FH+AT,
 	},
 	[NFSPROC_RMDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_rmdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_diropargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_void,
+		.pc_func = nfsd_proc_rmdir,
+		.pc_decode = nfssvc_decode_diropargs,
+		.pc_encode = nfssvc_encode_void,
 		.pc_argsize = sizeof(struct nfsd_diropargs),
 		.pc_ressize = sizeof(struct nfsd_void),
 		.pc_cachetype = RC_REPLSTAT,
 		.pc_xdrressize = ST,
 	},
 	[NFSPROC_READDIR] = {
-		.pc_func = (svc_procfunc) nfsd_proc_readdir,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_readdirres,
+		.pc_func = nfsd_proc_readdir,
+		.pc_decode = nfssvc_decode_readdirargs,
+		.pc_encode = nfssvc_encode_readdirres,
 		.pc_argsize = sizeof(struct nfsd_readdirargs),
 		.pc_ressize = sizeof(struct nfsd_readdirres),
 		.pc_cachetype = RC_NOCACHE,
 	},
 	[NFSPROC_STATFS] = {
-		.pc_func = (svc_procfunc) nfsd_proc_statfs,
-		.pc_decode = (kxdrproc_t) nfssvc_decode_fhandle,
-		.pc_encode = (kxdrproc_t) nfssvc_encode_statfsres,
+		.pc_func = nfsd_proc_statfs,
+		.pc_decode = nfssvc_decode_fhandle,
+		.pc_encode = nfssvc_encode_statfsres,
 		.pc_argsize = sizeof(struct nfsd_fhandle),
 		.pc_ressize = sizeof(struct nfsd_statfsres),
 		.pc_cachetype = RC_NOCACHE,
@@ -733,12 +743,14 @@ static struct svc_procedure		nfsd_procedures2[18] = {
 };
 
 
-struct svc_version	nfsd_version2 = {
-		.vs_vers	= 2,
-		.vs_nproc	= 18,
-		.vs_proc	= nfsd_procedures2,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS2_SVC_XDRSIZE,
+static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
+const struct svc_version nfsd_version2 = {
+	.vs_vers	= 2,
+	.vs_nproc	= 18,
+	.vs_proc	= nfsd_procedures2,
+	.vs_count	= nfsd_count2,
+	.vs_dispatch	= nfsd_dispatch,
+	.vs_xdrsize	= NFS2_SVC_XDRSIZE,
 };
 
 /*

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59979f0..063ae7d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c

@@ -68,14 +68,14 @@ unsigned long	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
-static struct svc_version *	nfsd_acl_version[] = {
+static const struct svc_version *nfsd_acl_version[] = {
 	[2] = &nfsd_acl_version2,
 	[3] = &nfsd_acl_version3,
 };
 
 #define NFSD_ACL_MINVERS            2
 #define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
-static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
+static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
 
 static struct svc_program	nfsd_acl_program = {
 	.pg_prog		= NFS_ACL_PROGRAM,
@@ -92,7 +92,7 @@ static struct svc_stat	nfsd_acl_svcstats = {
 };
 #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
 
-static struct svc_version *	nfsd_version[] = {
+static const struct svc_version *nfsd_version[] = {
 	[2] = &nfsd_version2,
 #if defined(CONFIG_NFSD_V3)
 	[3] = &nfsd_version3,
@@ -104,7 +104,7 @@ static struct svc_version *	nfsd_version[] = {
 
 #define NFSD_MINVERS    	2
 #define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
-static struct svc_version *nfsd_versions[NFSD_NRVERS];
+static const struct svc_version *nfsd_versions[NFSD_NRVERS];
 
 struct svc_program		nfsd_program = {
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -756,7 +756,7 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
  * problem, we enforce these assumptions here:
  */
 static bool nfs_request_too_big(struct svc_rqst *rqstp,
-				struct svc_procedure *proc)
+				const struct svc_procedure *proc)
 {
 	/*
 	 * The ACL code has more careful bounds-checking and is not
@@ -781,8 +781,7 @@ static bool nfs_request_too_big(struct svc_rqst *rqstp,
 int
 nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
-	struct svc_procedure	*proc;
-	kxdrproc_t		xdr;
+	const struct svc_procedure *proc;
 	__be32			nfserr;
 	__be32			*nfserrp;
 
@@ -801,9 +800,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 */
 	rqstp->rq_cachetype = proc->pc_cachetype;
 	/* Decode arguments */
-	xdr = proc->pc_decode;
-	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
-			rqstp->rq_argp)) {
+	if (proc->pc_decode &&
+	    !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
 		dprintk("nfsd: failed to decode arguments!\n");
 		*statp = rpc_garbage_args;
 		return 1;
@@ -827,7 +825,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
 	/* Now call the procedure handler, and encode NFS status. */
-	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+	nfserr = proc->pc_func(rqstp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
 	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
@@ -842,9 +840,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * For NFSv2, additional info is never returned in case of an error.
 	 */
 	if (!(nfserr && rqstp->rq_vers == 2)) {
-		xdr = proc->pc_encode;
-		if (xdr && !xdr(rqstp, nfserrp,
-				rqstp->rq_resp)) {
+		if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
 			/* Failed to encode result. Release cache entry */
 			dprintk("nfsd: failed to encode result!\n");
 			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);

diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index de07ff6..e4da271 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c

@@ -206,14 +206,16 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_fhandle *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -221,9 +223,10 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *ar
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_sattrargs *args)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_sattrargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -233,9 +236,10 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropargs *args)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->fh))
 	 || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -244,9 +248,9 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readargs *args)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readargs *args = rqstp->rq_argp;
 	unsigned int len;
 	int v;
 	p = decode_fh(p, &args->fh);
@@ -276,9 +280,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_writeargs *args)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_writeargs *args = rqstp->rq_argp;
 	unsigned int len, hdr, dlen;
 	struct kvec *head = rqstp->rq_arg.head;
 	int v;
@@ -332,9 +336,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_createargs *args)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_createargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->fh))
 	    || !(p = decode_filename(p, &args->name, &args->len)))
 		return 0;
@@ -344,9 +349,10 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_renameargs *args)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_renameargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_filename(p, &args->fname, &args->flen))
 	 || !(p = decode_fh(p, &args->tfh))
@@ -357,8 +363,10 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -368,9 +376,10 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_linkargs *args)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_linkargs *args = rqstp->rq_argp;
+
 	if (!(p = decode_fh(p, &args->ffh))
 	 || !(p = decode_fh(p, &args->tfh))
 	 || !(p = decode_filename(p, &args->tname, &args->tlen)))
@@ -380,9 +389,10 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_symlinkargs *args)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_symlinkargs *args = rqstp->rq_argp;
+
 	if (   !(p = decode_fh(p, &args->ffh))
 	    || !(p = decode_filename(p, &args->fname, &args->flen))
 	    || !(p = decode_pathname(p, &args->tname, &args->tlen)))
@@ -393,9 +403,10 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirargs *args)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirargs *args = rqstp->rq_argp;
+
 	p = decode_fh(p, &args->fh);
 	if (!p)
 		return 0;
@@ -411,32 +422,35 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_attrstat *resp)
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_attrstat *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_diropres *resp)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_diropres *resp = rqstp->rq_resp;
+
 	p = encode_fh(p, &resp->fh);
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readlinkres *resp)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readlinkres *resp = rqstp->rq_resp;
+
 	*p++ = htonl(resp->len);
 	xdr_ressize_check(rqstp, p);
 	rqstp->rq_res.page_len = resp->len;
@@ -450,9 +464,10 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readres *resp)
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readres *resp = rqstp->rq_resp;
+
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
@@ -469,9 +484,10 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_readdirres *resp)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_readdirres *resp = rqstp->rq_resp;
+
 	xdr_ressize_check(rqstp, p);
 	p = resp->buffer;
 	*p++ = 0;			/* no more entries */
@@ -482,9 +498,9 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_statfsres *resp)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
 {
+	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
 
 	*p++ = htonl(NFSSVC_MAXBLKSIZE_V2);	/* max transfer size */
@@ -543,10 +559,10 @@ nfssvc_encode_entry(void *ccdv, const char *name,
 /*
  * XDR release functions
  */
-int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
-					struct nfsd_fhandle *resp)
+void
+nfssvc_release_fhandle(struct svc_rqst *rqstp)
 {
+	struct nfsd_fhandle *resp = rqstp->rq_resp;
+
 	fh_put(&resp->fh);
-	return 1;
 }

diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 4f0481d6..457ce45 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h

@@ -131,40 +131,30 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd_readdirargs *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+int nfssvc_decode_void(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
 
 int nfssvc_encode_entry(void *, const char *name,
 			int namlen, loff_t offset, u64 ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+void nfssvc_release_fhandle(struct svc_rqst *);
 
 /* Helper functions for NFSv2 ACL code */
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);

diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 335e04a..80d7da6 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h

@@ -269,71 +269,41 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitargs *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
-				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
-				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
-				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
-				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
-				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
-				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
-				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
-				struct nfsd3_commitres *);
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
-				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
-				struct nfsd3_fhandle_pair *);
+void nfs3svc_release_fhandle(struct svc_rqst *);
+void nfs3svc_release_fhandle2(struct svc_rqst *);
 int nfs3svc_encode_entry(void *, const char *name,
 				int namlen, loff_t offset, u64 ino,
 				unsigned int);

diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 8fda4ab..72c6ad1 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h

@@ -539,7 +539,7 @@ struct nfsd4_seek {
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
-	union {
+	union nfsd4_op_u {
 		struct nfsd4_access		access;
 		struct nfsd4_close		close;
 		struct nfsd4_commit		commit;
@@ -577,6 +577,7 @@ struct nfsd4_op {
 		struct nfsd4_bind_conn_to_session bind_conn_to_session;
 		struct nfsd4_create_session	create_session;
 		struct nfsd4_destroy_session	destroy_session;
+		struct nfsd4_destroy_clientid	destroy_clientid;
 		struct nfsd4_sequence		sequence;
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
@@ -585,6 +586,7 @@ struct nfsd4_op {
 		struct nfsd4_layoutget		layoutget;
 		struct nfsd4_layoutcommit	layoutcommit;
 		struct nfsd4_layoutreturn	layoutreturn;
+		struct nfsd4_secinfo_no_name	secinfo_no_name;
 
 		/* NFSv4.2 */
 		struct nfsd4_fallocate		allocate;
@@ -682,11 +684,9 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundargs *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
-		struct nfsd4_compoundres *);
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
@@ -695,27 +695,26 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 		struct dentry *dentry,
 		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid *setclid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_setclientid_confirm *setclientid_confirm);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
-extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
-extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *,
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_create_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_create_session *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_sequence(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_sequence *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
 extern __be32 nfsd4_destroy_session(struct svc_rqst *,
-		struct nfsd4_compound_state *,
-		struct nfsd4_destroy_session *);
-extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *);
-__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
 		struct nfsd4_open *open, struct nfsd_net *nn);
 extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
@@ -724,34 +723,29 @@ extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
 extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
 		struct nfsd4_open *open);
 extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
-extern __be32 nfsd4_close(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_close *close);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_open_downgrade *od);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
-		struct nfsd4_lock *lock);
-extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_lockt *lockt);
-extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_locku *locku);
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *,
-		struct nfsd4_release_lockowner *rlockowner);
-extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern void nfsd4_release_compoundargs(struct svc_rqst *rqstp);
 extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
-extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
-			  struct nfsd4_compound_state *, clientid_t *clid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *u);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+		union nfsd4_op_u *u);
 extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
-		struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
+		struct nfsd4_compound_state *, union nfsd4_op_u *);
 extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
 
 #endif

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index c0c9683..cbfc196 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig

@@ -23,3 +23,23 @@
 	  Note, that redirects are not backward compatible.  That is, mounting
 	  an overlay which has redirects on a kernel that doesn't support this
 	  feature will have unexpected results.
+
+config OVERLAY_FS_INDEX
+	bool "Overlayfs: turn on inodes index feature by default"
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  the inodes index dir to map lower inodes to upper inodes by default.
+	  In this case it is still possible to turn off index globally with the
+	  "index=off" module option or on a filesystem instance basis with the
+	  "index=off" mount option.
+
+	  The inodes index feature prevents breaking of lower hardlinks on copy
+	  up.
+
+	  Note, that the inodes index feature is read-only backward compatible.
+	  That is, mounting an overlay which has an index dir on a kernel that
+	  doesn't support this feature read-only, will not have any negative
+	  outcomes.  However, mounting the same overlay with an old kernel
+	  read-write and then mounting it again with a new kernel, will have
+	  unexpected results.

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index e5869f9..acb6f97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c

@@ -233,12 +233,13 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
 	void *buf;
 	int buflen = MAX_HANDLE_SZ;
+	uuid_t *uuid = &lower->d_sb->s_uuid;
 
 	buf = kmalloc(buflen, GFP_TEMPORARY);
 	if (!buf)
@@ -271,6 +272,14 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
 	fh->magic = OVL_FH_MAGIC;
 	fh->type = fh_type;
 	fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+	/*
+	 * When we will want to decode an overlay dentry from this handle
+	 * and all layers are on the same fs, if we get a disconncted real
+	 * dentry when we decode fid, the only way to tell if we should assign
+	 * it to upperdentry or to lowerstack is by checking this flag.
+	 */
+	if (is_upper)
+		fh->flags |= OVL_FH_FLAG_PATH_UPPER;
 	fh->len = fh_len;
 	fh->uuid = *uuid;
 	memcpy(fh->fid, buf, buflen);
@@ -283,7 +292,6 @@ static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
 static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 			  struct dentry *upper)
 {
-	struct super_block *sb = lower->d_sb;
 	const struct ovl_fh *fh = NULL;
 	int err;
 
@@ -292,9 +300,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * so we can use the overlay.origin xattr to distignuish between a copy
 	 * up and a pure upper inode.
 	 */
-	if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-	    !uuid_is_null(&sb->s_uuid)) {
-		fh = ovl_encode_fh(lower, &sb->s_uuid);
+	if (ovl_can_decode_fh(lower->d_sb)) {
+		fh = ovl_encode_fh(lower, false);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
@@ -309,84 +316,156 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	return err;
 }
 
-static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
-			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link,
-			      struct kstat *pstat, bool tmpfile)
+struct ovl_copy_up_ctx {
+	struct dentry *parent;
+	struct dentry *dentry;
+	struct path lowerpath;
+	struct kstat stat;
+	struct kstat pstat;
+	const char *link;
+	struct dentry *destdir;
+	struct qstr destname;
+	struct dentry *workdir;
+	bool tmpfile;
+	bool origin;
+};
+
+static int ovl_link_up(struct ovl_copy_up_ctx *c)
 {
-	struct inode *wdir = workdir->d_inode;
-	struct inode *udir = upperdir->d_inode;
-	struct dentry *newdentry = NULL;
-	struct dentry *upper = NULL;
-	struct dentry *temp = NULL;
 	int err;
+	struct dentry *upper;
+	struct dentry *upperdir = ovl_dentry_upper(c->parent);
+	struct inode *udir = d_inode(upperdir);
+
+	/* Mark parent "impure" because it may now contain non-pure upper */
+	err = ovl_set_impure(c->parent, upperdir);
+	if (err)
+		return err;
+
+	err = ovl_set_nlink_lower(c->dentry);
+	if (err)
+		return err;
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+	upper = lookup_one_len(c->dentry->d_name.name, upperdir,
+			       c->dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
+				  true);
+		dput(upper);
+
+		if (!err) {
+			/* Restore timestamps on parent (best effort) */
+			ovl_set_timestamps(upperdir, &c->pstat);
+			ovl_dentry_set_upper_alias(c->dentry);
+		}
+	}
+	inode_unlock(udir);
+	ovl_set_nlink_upper(c->dentry);
+
+	return err;
+}
+
+static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+			    struct dentry **newdentry)
+{
+	int err;
+	struct dentry *upper;
+	struct inode *udir = d_inode(c->destdir);
+
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+	if (IS_ERR(upper))
+		return PTR_ERR(upper);
+
+	if (c->tmpfile)
+		err = ovl_do_link(temp, udir, upper, true);
+	else
+		err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+
+	if (!err)
+		*newdentry = dget(c->tmpfile ? upper : temp);
+	dput(upper);
+
+	return err;
+}
+
+static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
+{
+	int err;
+	struct dentry *temp;
 	const struct cred *old_creds = NULL;
 	struct cred *new_creds = NULL;
 	struct cattr cattr = {
 		/* Can't properly set mode on creation because of the umask */
-		.mode = stat->mode & S_IFMT,
-		.rdev = stat->rdev,
-		.link = link
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
 	};
 
-	err = security_inode_copy_up(dentry, &new_creds);
+	err = security_inode_copy_up(c->dentry, &new_creds);
 	if (err < 0)
 		goto out;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	if (tmpfile)
-		temp = ovl_do_tmpfile(upperdir, stat->mode);
-	else
-		temp = ovl_lookup_temp(workdir);
-	err = 0;
-	if (IS_ERR(temp)) {
-		err = PTR_ERR(temp);
-		temp = NULL;
+	if (c->tmpfile) {
+		temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+		if (IS_ERR(temp))
+			goto temp_err;
+	} else {
+		temp = ovl_lookup_temp(c->workdir);
+		if (IS_ERR(temp))
+			goto temp_err;
+
+		err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
+				      NULL, true);
+		if (err) {
+			dput(temp);
+			goto out;
+		}
 	}
-
-	if (!err && !tmpfile)
-		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
-
+	err = 0;
+	*tempp = temp;
+out:
 	if (new_creds) {
 		revert_creds(old_creds);
 		put_cred(new_creds);
 	}
 
-	if (err)
-		goto out;
+	return err;
 
-	if (S_ISREG(stat->mode)) {
+temp_err:
+	err = PTR_ERR(temp);
+	goto out;
+}
+
+static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
+{
+	int err;
+
+	if (S_ISREG(c->stat.mode)) {
 		struct path upperpath;
 
-		ovl_path_upper(dentry, &upperpath);
+		ovl_path_upper(c->dentry, &upperpath);
 		BUG_ON(upperpath.dentry != NULL);
 		upperpath.dentry = temp;
 
-		if (tmpfile) {
-			inode_unlock(udir);
-			err = ovl_copy_up_data(lowerpath, &upperpath,
-					       stat->size);
-			inode_lock_nested(udir, I_MUTEX_PARENT);
-		} else {
-			err = ovl_copy_up_data(lowerpath, &upperpath,
-					       stat->size);
-		}
-
+		err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
 		if (err)
-			goto out_cleanup;
+			return err;
 	}
 
-	err = ovl_copy_xattr(lowerpath->dentry, temp);
+	err = ovl_copy_xattr(c->lowerpath.dentry, temp);
 	if (err)
-		goto out_cleanup;
+		return err;
 
 	inode_lock(temp->d_inode);
-	err = ovl_set_attr(temp, stat);
+	err = ovl_set_attr(temp, &c->stat);
 	inode_unlock(temp->d_inode);
 	if (err)
-		goto out_cleanup;
+		return err;
 
 	/*
 	 * Store identifier of lower inode in upper inode xattr to
@@ -395,41 +474,48 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	 * Don't set origin when we are breaking the association with a lower
 	 * hard link.
 	 */
-	if (S_ISDIR(stat->mode) || stat->nlink == 1) {
-		err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+	if (c->origin) {
+		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
 		if (err)
-			goto out_cleanup;
+			return err;
 	}
 
-	upper = lookup_one_len(dentry->d_name.name, upperdir,
-			       dentry->d_name.len);
-	if (IS_ERR(upper)) {
-		err = PTR_ERR(upper);
-		upper = NULL;
-		goto out_cleanup;
-	}
+	return 0;
+}
 
-	if (tmpfile)
-		err = ovl_do_link(temp, udir, upper, true);
-	else
-		err = ovl_do_rename(wdir, temp, udir, upper, 0);
+static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+{
+	struct inode *udir = c->destdir->d_inode;
+	struct dentry *newdentry = NULL;
+	struct dentry *temp = NULL;
+	int err;
+
+	err = ovl_get_tmpfile(c, &temp);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up_inode(c, temp);
 	if (err)
 		goto out_cleanup;
 
-	newdentry = dget(tmpfile ? upper : temp);
-	ovl_dentry_update(dentry, newdentry);
-	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
+	if (c->tmpfile) {
+		inode_lock_nested(udir, I_MUTEX_PARENT);
+		err = ovl_install_temp(c, temp, &newdentry);
+		inode_unlock(udir);
+	} else {
+		err = ovl_install_temp(c, temp, &newdentry);
+	}
+	if (err)
+		goto out_cleanup;
 
-	/* Restore timestamps on parent (best effort) */
-	ovl_set_timestamps(upperdir, pstat);
+	ovl_inode_update(d_inode(c->dentry), newdentry);
 out:
 	dput(temp);
-	dput(upper);
 	return err;
 
 out_cleanup:
-	if (!tmpfile)
-		ovl_cleanup(wdir, temp);
+	if (!c->tmpfile)
+		ovl_cleanup(d_inode(c->workdir), temp);
 	goto out;
 }
 
@@ -442,78 +528,119 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
  * is possible that the copy up will lock the old parent.  At that point
  * the file will have already been copied up anyway.
  */
-static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-			   struct path *lowerpath, struct kstat *stat)
+static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
-	DEFINE_DELAYED_CALL(done);
-	struct dentry *workdir = ovl_workdir(dentry);
 	int err;
-	struct kstat pstat;
-	struct path parentpath;
-	struct dentry *lowerdentry = lowerpath->dentry;
-	struct dentry *upperdir;
-	const char *link = NULL;
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+	bool indexed = false;
 
-	if (WARN_ON(!workdir))
+	if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
+	    c->stat.nlink > 1)
+		indexed = true;
+
+	if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
+		c->origin = true;
+
+	if (indexed) {
+		c->destdir = ovl_indexdir(c->dentry->d_sb);
+		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+		if (err)
+			return err;
+	} else {
+		/*
+		 * Mark parent "impure" because it may now contain non-pure
+		 * upper
+		 */
+		err = ovl_set_impure(c->parent, c->destdir);
+		if (err)
+			return err;
+	}
+
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
+		c->tmpfile = true;
+		err = ovl_copy_up_locked(c);
+	} else {
+		err = -EIO;
+		if (lock_rename(c->workdir, c->destdir) != NULL) {
+			pr_err("overlayfs: failed to lock workdir+upperdir\n");
+		} else {
+			err = ovl_copy_up_locked(c);
+			unlock_rename(c->workdir, c->destdir);
+		}
+	}
+
+	if (indexed) {
+		if (!err)
+			ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+		kfree(c->destname.name);
+	} else if (!err) {
+		struct inode *udir = d_inode(c->destdir);
+
+		/* Restore timestamps on parent (best effort) */
+		inode_lock(udir);
+		ovl_set_timestamps(c->destdir, &c->pstat);
+		inode_unlock(udir);
+
+		ovl_dentry_set_upper_alias(c->dentry);
+	}
+
+	return err;
+}
+
+static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+			   int flags)
+{
+	int err;
+	DEFINE_DELAYED_CALL(done);
+	struct path parentpath;
+	struct ovl_copy_up_ctx ctx = {
+		.parent = parent,
+		.dentry = dentry,
+		.workdir = ovl_workdir(dentry),
+	};
+
+	if (WARN_ON(!ctx.workdir))
 		return -EROFS;
 
-	ovl_do_check_copy_up(lowerdentry);
-
-	ovl_path_upper(parent, &parentpath);
-	upperdir = parentpath.dentry;
-
-	/* Mark parent "impure" because it may now contain non-pure upper */
-	err = ovl_set_impure(parent, upperdir);
+	ovl_path_lower(dentry, &ctx.lowerpath);
+	err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		return err;
 
-	err = vfs_getattr(&parentpath, &pstat,
+	ovl_path_upper(parent, &parentpath);
+	ctx.destdir = parentpath.dentry;
+	ctx.destname = dentry->d_name;
+
+	err = vfs_getattr(&parentpath, &ctx.pstat,
 			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		return err;
 
-	if (S_ISLNK(stat->mode)) {
-		link = vfs_get_link(lowerdentry, &done);
-		if (IS_ERR(link))
-			return PTR_ERR(link);
+	/* maybe truncate regular file. this has no effect on dirs */
+	if (flags & O_TRUNC)
+		ctx.stat.size = 0;
+
+	if (S_ISLNK(ctx.stat.mode)) {
+		ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
+		if (IS_ERR(ctx.link))
+			return PTR_ERR(ctx.link);
 	}
+	ovl_do_check_copy_up(ctx.lowerpath.dentry);
 
-	/* Should we copyup with O_TMPFILE or with workdir? */
-	if (S_ISREG(stat->mode) && ofs->tmpfile) {
-		err = ovl_copy_up_start(dentry);
-		/* err < 0: interrupted, err > 0: raced with another copy-up */
-		if (unlikely(err)) {
-			pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
-			if (err > 0)
-				err = 0;
-			goto out_done;
-		}
-
-		inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
-		err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-					 stat, link, &pstat, true);
-		inode_unlock(upperdir->d_inode);
+	err = ovl_copy_up_start(dentry);
+	/* err < 0: interrupted, err > 0: raced with another copy-up */
+	if (unlikely(err)) {
+		if (err > 0)
+			err = 0;
+	} else {
+		if (!ovl_dentry_upper(dentry))
+			err = ovl_do_copy_up(&ctx);
+		if (!err && !ovl_dentry_has_upper_alias(dentry))
+			err = ovl_link_up(&ctx);
 		ovl_copy_up_end(dentry);
-		goto out_done;
 	}
-
-	err = -EIO;
-	if (lock_rename(workdir, upperdir) != NULL) {
-		pr_err("overlayfs: failed to lock workdir+upperdir\n");
-		goto out_unlock;
-	}
-	if (ovl_dentry_upper(dentry)) {
-		/* Raced with another copy-up?  Nothing to do, then... */
-		err = 0;
-		goto out_unlock;
-	}
-
-	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat, false);
-out_unlock:
-	unlock_rename(workdir, upperdir);
-out_done:
 	do_delayed_call(&done);
 
 	return err;
@@ -527,11 +654,22 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 	while (!err) {
 		struct dentry *next;
 		struct dentry *parent;
-		struct path lowerpath;
-		struct kstat stat;
-		enum ovl_path_type type = ovl_path_type(dentry);
 
-		if (OVL_TYPE_UPPER(type))
+		/*
+		 * Check if copy-up has happened as well as for upper alias (in
+		 * case of hard links) is there.
+		 *
+		 * Both checks are lockless:
+		 *  - false negatives: will recheck under oi->lock
+		 *  - false positives:
+		 *    + ovl_dentry_upper() uses memory barriers to ensure the
+		 *      upper dentry is up-to-date
+		 *    + ovl_dentry_has_upper_alias() relies on locking of
+		 *      upper parent i_rwsem to prevent reordering copy-up
+		 *      with rename.
+		 */
+		if (ovl_dentry_upper(dentry) &&
+		    ovl_dentry_has_upper_alias(dentry))
 			break;
 
 		next = dget(dentry);
@@ -539,22 +677,14 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 		for (;;) {
 			parent = dget_parent(next);
 
-			type = ovl_path_type(parent);
-			if (OVL_TYPE_UPPER(type))
+			if (ovl_dentry_upper(parent))
 				break;
 
 			dput(next);
 			next = parent;
 		}
 
-		ovl_path_lower(next, &lowerpath);
-		err = vfs_getattr(&lowerpath, &stat,
-				  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
-		/* maybe truncate regular file. this has no effect on dirs */
-		if (flags & O_TRUNC)
-			stat.size = 0;
-		if (!err)
-			err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
+		err = ovl_copy_up_one(parent, next, flags);
 
 		dput(parent);
 		dput(next);

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a63a716..641d9ee 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c

@@ -24,7 +24,7 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
 MODULE_PARM_DESC(ovl_redirect_max,
 		 "Maximum length of absolute redirect xattr value");
 
-void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 {
 	int err;
 
@@ -39,6 +39,8 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
 		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
 		       wdentry, err);
 	}
+
+	return err;
 }
 
 struct dentry *ovl_lookup_temp(struct dentry *workdir)
@@ -154,12 +156,13 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
 			    struct dentry *newdentry, bool hardlink)
 {
 	ovl_dentry_version_inc(dentry->d_parent);
-	ovl_dentry_update(dentry, newdentry);
+	ovl_dentry_set_upper_alias(dentry);
 	if (!hardlink) {
-		ovl_inode_update(inode, d_inode(newdentry));
+		ovl_inode_update(inode, newdentry);
 		ovl_copyattr(newdentry->d_inode, inode);
 	} else {
-		WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
+		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
+		dput(newdentry);
 		inc_nlink(inode);
 	}
 	d_instantiate(dentry, inode);
@@ -588,6 +591,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 		    struct dentry *new)
 {
 	int err;
+	bool locked = false;
 	struct inode *inode;
 
 	err = ovl_want_write(old);
@@ -598,6 +602,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		goto out_drop_write;
 
+	err = ovl_nlink_start(old, &locked);
+	if (err)
+		goto out_drop_write;
+
 	inode = d_inode(old);
 	ihold(inode);
 
@@ -605,12 +613,18 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		iput(inode);
 
+	ovl_nlink_end(old, locked);
 out_drop_write:
 	ovl_drop_write(old);
 out:
 	return err;
 }
 
+static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
+{
+	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
+}
+
 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
@@ -646,7 +660,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
 	err = -ESTALE;
 	if ((opaquedir && upper != opaquedir) ||
 	    (!opaquedir && ovl_dentry_upper(dentry) &&
-	     upper != ovl_dentry_upper(dentry))) {
+	     !ovl_matches_upper(dentry, upper))) {
 		goto out_dput_upper;
 	}
 
@@ -707,7 +721,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
 
 	err = -ESTALE;
 	if ((opaquedir && upper != opaquedir) ||
-	    (!opaquedir && upper != ovl_dentry_upper(dentry)))
+	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
 		goto out_dput_upper;
 
 	if (is_dir)
@@ -735,8 +749,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
 
 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 {
-	enum ovl_path_type type;
 	int err;
+	bool locked = false;
 	const struct cred *old_cred;
 
 	err = ovl_want_write(dentry);
@@ -747,7 +761,9 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 	if (err)
 		goto out_drop_write;
 
-	type = ovl_path_type(dentry);
+	err = ovl_nlink_start(dentry, &locked);
+	if (err)
+		goto out_drop_write;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
 	if (!ovl_lower_positive(dentry))
@@ -761,6 +777,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 		else
 			drop_nlink(dentry->d_inode);
 	}
+	ovl_nlink_end(dentry, locked);
 out_drop_write:
 	ovl_drop_write(dentry);
 out:
@@ -883,6 +900,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		      unsigned int flags)
 {
 	int err;
+	bool locked = false;
 	struct dentry *old_upperdir;
 	struct dentry *new_upperdir;
 	struct dentry *olddentry;
@@ -926,6 +944,10 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		err = ovl_copy_up(new);
 		if (err)
 			goto out_drop_write;
+	} else {
+		err = ovl_nlink_start(new, &locked);
+		if (err)
+			goto out_drop_write;
 	}
 
 	old_cred = ovl_override_creds(old->d_sb);
@@ -985,7 +1007,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		goto out_unlock;
 
 	err = -ESTALE;
-	if (olddentry != ovl_dentry_upper(old))
+	if (!ovl_matches_upper(old, olddentry))
 		goto out_dput_old;
 
 	newdentry = lookup_one_len(new->d_name.name, new_upperdir,
@@ -998,12 +1020,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	new_opaque = ovl_dentry_is_opaque(new);
 
 	err = -ESTALE;
-	if (ovl_dentry_upper(new)) {
+	if (d_inode(new) && ovl_dentry_upper(new)) {
 		if (opaquedir) {
 			if (newdentry != opaquedir)
 				goto out_dput;
 		} else {
-			if (newdentry != ovl_dentry_upper(new))
+			if (!ovl_matches_upper(new, newdentry))
 				goto out_dput;
 		}
 	} else {
@@ -1046,6 +1068,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	if (cleanup_whiteout)
 		ovl_cleanup(old_upperdir->d_inode, newdentry);
 
+	if (overwrite && d_inode(new)) {
+		if (new_is_dir)
+			clear_nlink(d_inode(new));
+		else
+			drop_nlink(d_inode(new));
+	}
+
 	ovl_dentry_version_inc(old->d_parent);
 	ovl_dentry_version_inc(new->d_parent);
 
@@ -1057,6 +1086,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	unlock_rename(new_upperdir, old_upperdir);
 out_revert_creds:
 	revert_creds(old_cred);
+	ovl_nlink_end(new, locked);
 out_drop_write:
 	ovl_drop_write(old);
 out:

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index d613e2c..69f4fc2 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c

@@ -12,6 +12,7 @@
 #include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
+#include <linux/ratelimit.h>
 #include "overlayfs.h"
 
 int ovl_setattr(struct dentry *dentry, struct iattr *attr)
@@ -96,11 +97,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 
 			WARN_ON_ONCE(stat->dev != lowerstat.dev);
 			/*
-			 * Lower hardlinks are broken on copy up to different
+			 * Lower hardlinks may be broken on copy up to different
 			 * upper files, so we cannot use the lower origin st_ino
 			 * for those different files, even for the same fs case.
+			 * With inodes index enabled, it is safe to use st_ino
+			 * of an indexed hardlinked origin. The index validates
+			 * that the upper hardlink is not broken.
 			 */
-			if (is_dir || lowerstat.nlink == 1)
+			if (is_dir || lowerstat.nlink == 1 ||
+			    ovl_test_flag(OVL_INDEX, d_inode(dentry)))
 				stat->ino = lowerstat.ino;
 		}
 		stat->dev = dentry->d_sb->s_dev;
@@ -126,6 +131,15 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	if (is_dir && OVL_TYPE_MERGE(type))
 		stat->nlink = 1;
 
+	/*
+	 * Return the overlay inode nlinks for indexed upper inodes.
+	 * Overlay inode nlink counts the union of the upper hardlinks
+	 * and non-covered lower hardlinks. It does not include the upper
+	 * index hardlink.
+	 */
+	if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		stat->nlink = dentry->d_inode->i_nlink;
+
 out:
 	revert_creds(old_cred);
 
@@ -134,8 +148,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 
 int ovl_permission(struct inode *inode, int mask)
 {
-	bool is_upper;
-	struct inode *realinode = ovl_inode_real(inode, &is_upper);
+	struct inode *upperinode = ovl_inode_upper(inode);
+	struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
 	const struct cred *old_cred;
 	int err;
 
@@ -154,7 +168,8 @@ int ovl_permission(struct inode *inode, int mask)
 		return err;
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+	if (!upperinode &&
+	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
 		mask &= ~(MAY_WRITE | MAY_APPEND);
 		/* Make sure mounter can read file for copy up later */
 		mask |= MAY_READ;
@@ -286,7 +301,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 
 struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 {
-	struct inode *realinode = ovl_inode_real(inode, NULL);
+	struct inode *realinode = ovl_inode_real(inode);
 	const struct cred *old_cred;
 	struct posix_acl *acl;
 
@@ -300,13 +315,13 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
-				  struct dentry *realdentry)
+static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
 {
-	if (OVL_TYPE_UPPER(type))
+	if (ovl_dentry_upper(dentry) &&
+	    ovl_dentry_has_upper_alias(dentry))
 		return false;
 
-	if (special_file(realdentry->d_inode->i_mode))
+	if (special_file(d_inode(dentry)->i_mode))
 		return false;
 
 	if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
@@ -318,11 +333,8 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
 int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
 {
 	int err = 0;
-	struct path realpath;
-	enum ovl_path_type type;
 
-	type = ovl_path_real(dentry, &realpath);
-	if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
+	if (ovl_open_need_copy_up(dentry, file_flags)) {
 		err = ovl_want_write(dentry);
 		if (!err) {
 			err = ovl_copy_up_flags(dentry, file_flags);
@@ -440,6 +452,103 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 	}
 }
 
+/*
+ * With inodes index enabled, an overlay inode nlink counts the union of upper
+ * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure
+ * upper inode, the following nlink modifying operations can happen:
+ *
+ * 1. Lower hardlink copy up
+ * 2. Upper hardlink created, unlinked or renamed over
+ * 3. Lower hardlink whiteout or renamed over
+ *
+ * For the first, copy up case, the union nlink does not change, whether the
+ * operation succeeds or fails, but the upper inode nlink may change.
+ * Therefore, before copy up, we store the union nlink value relative to the
+ * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ *
+ * For the second, upper hardlink case, the union nlink should be incremented
+ * or decremented IFF the operation succeeds, aligned with nlink change of the
+ * upper inode. Therefore, before link/unlink/rename, we store the union nlink
+ * value relative to the upper inode nlink in the index inode.
+ *
+ * For the last, lower cover up case, we simplify things by preceding the
+ * whiteout or cover up with copy up. This makes sure that there is an index
+ * upper inode where the nlink xattr can be stored before the copied up upper
+ * entry is unlink.
+ */
+#define OVL_NLINK_ADD_UPPER	(1 << 0)
+
+/*
+ * On-disk format for indexed nlink:
+ *
+ * nlink relative to the upper inode - "U[+-]NUM"
+ * nlink relative to the lower inode - "L[+-]NUM"
+ */
+
+static int ovl_set_nlink_common(struct dentry *dentry,
+				struct dentry *realdentry, const char *format)
+{
+	struct inode *inode = d_inode(dentry);
+	struct inode *realinode = d_inode(realdentry);
+	char buf[13];
+	int len;
+
+	len = snprintf(buf, sizeof(buf), format,
+		       (int) (inode->i_nlink - realinode->i_nlink));
+
+	return ovl_do_setxattr(ovl_dentry_upper(dentry),
+			       OVL_XATTR_NLINK, buf, len, 0);
+}
+
+int ovl_set_nlink_upper(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i");
+}
+
+int ovl_set_nlink_lower(struct dentry *dentry)
+{
+	return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i");
+}
+
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback)
+{
+	int nlink_diff;
+	int nlink;
+	char buf[13];
+	int err;
+
+	if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1)
+		return fallback;
+
+	err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1);
+	if (err < 0)
+		goto fail;
+
+	buf[err] = '\0';
+	if ((buf[0] != 'L' && buf[0] != 'U') ||
+	    (buf[1] != '+' && buf[1] != '-'))
+		goto fail;
+
+	err = kstrtoint(buf + 1, 10, &nlink_diff);
+	if (err < 0)
+		goto fail;
+
+	nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink;
+	nlink += nlink_diff;
+
+	if (nlink <= 0)
+		goto fail;
+
+	return nlink;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n",
+			    upperdentry, err);
+	return fallback;
+}
+
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 {
 	struct inode *inode;
@@ -453,27 +562,87 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 
 static int ovl_inode_test(struct inode *inode, void *data)
 {
-	return ovl_inode_real(inode, NULL) == data;
+	return inode->i_private == data;
 }
 
 static int ovl_inode_set(struct inode *inode, void *data)
 {
-	inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
+	inode->i_private = data;
 	return 0;
 }
 
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
-
+static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
+			     struct dentry *upperdentry)
 {
+	struct inode *lowerinode = lowerdentry ? d_inode(lowerdentry) : NULL;
+
+	/* Lower (origin) inode must match, even if NULL */
+	if (ovl_inode_lower(inode) != lowerinode)
+		return false;
+
+	/*
+	 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL.
+	 * This happens when finding a lower alias for a copied up hard link.
+	 */
+	if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry))
+		return false;
+
+	return true;
+}
+
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry)
+{
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
 
-	inode = iget5_locked(sb, (unsigned long) realinode,
-			     ovl_inode_test, ovl_inode_set, realinode);
-	if (inode && inode->i_state & I_NEW) {
-		ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
-		set_nlink(inode, realinode->i_nlink);
-		unlock_new_inode(inode);
-	}
+	if (!realinode)
+		realinode = d_inode(lowerdentry);
 
+	if (!S_ISDIR(realinode->i_mode) &&
+	    (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) {
+		struct inode *key = d_inode(lowerdentry ?: upperdentry);
+		unsigned int nlink;
+
+		inode = iget5_locked(dentry->d_sb, (unsigned long) key,
+				     ovl_inode_test, ovl_inode_set, key);
+		if (!inode)
+			goto out_nomem;
+		if (!(inode->i_state & I_NEW)) {
+			/*
+			 * Verify that the underlying files stored in the inode
+			 * match those in the dentry.
+			 */
+			if (!ovl_verify_inode(inode, lowerdentry, upperdentry)) {
+				iput(inode);
+				inode = ERR_PTR(-ESTALE);
+				goto out;
+			}
+
+			dput(upperdentry);
+			goto out;
+		}
+
+		nlink = ovl_get_nlink(lowerdentry, upperdentry,
+				      realinode->i_nlink);
+		set_nlink(inode, nlink);
+	} else {
+		inode = new_inode(dentry->d_sb);
+		if (!inode)
+			goto out_nomem;
+	}
+	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+	ovl_inode_init(inode, upperdentry, lowerdentry);
+
+	if (upperdentry && ovl_is_impuredir(upperdentry))
+		ovl_set_flag(OVL_IMPURE, inode);
+
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
+out:
 	return inode;
+
+out_nomem:
+	inode = ERR_PTR(-ENOMEM);
+	goto out;
 }

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index de0d4f7..9bc0e58 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c

@@ -88,13 +88,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
 	return 1;
 }
 
-static struct dentry *ovl_get_origin(struct dentry *dentry,
-				     struct vfsmount *mnt)
+static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry)
 {
 	int res;
 	struct ovl_fh *fh = NULL;
-	struct dentry *origin = NULL;
-	int bytes;
 
 	res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0);
 	if (res < 0) {
@@ -106,7 +103,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	if (res == 0)
 		return NULL;
 
-	fh  = kzalloc(res, GFP_TEMPORARY);
+	fh = kzalloc(res, GFP_TEMPORARY);
 	if (!fh)
 		return ERR_PTR(-ENOMEM);
 
@@ -129,7 +126,29 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	    (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
 		goto out;
 
-	bytes = (fh->len - offsetof(struct ovl_fh, fid));
+	return fh;
+
+out:
+	kfree(fh);
+	return NULL;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
+	goto out;
+invalid:
+	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	goto out;
+}
+
+static struct dentry *ovl_get_origin(struct dentry *dentry,
+				     struct vfsmount *mnt)
+{
+	struct dentry *origin = NULL;
+	struct ovl_fh *fh = ovl_get_origin_fh(dentry);
+	int bytes;
+
+	if (IS_ERR_OR_NULL(fh))
+		return (struct dentry *)fh;
 
 	/*
 	 * Make sure that the stored uuid matches the uuid of the lower
@@ -138,6 +157,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
 		goto out;
 
+	bytes = (fh->len - offsetof(struct ovl_fh, fid));
 	origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
 				    bytes >> 2, (int)fh->type,
 				    ovl_acceptable, NULL);
@@ -149,21 +169,17 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
 	}
 
 	if (ovl_dentry_weird(origin) ||
-	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT)) {
-		dput(origin);
-		origin = NULL;
+	    ((d_inode(origin)->i_mode ^ d_inode(dentry)->i_mode) & S_IFMT))
 		goto invalid;
-	}
 
 out:
 	kfree(fh);
 	return origin;
 
-fail:
-	pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res);
-	goto out;
 invalid:
-	pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh);
+	pr_warn_ratelimited("overlayfs: invalid origin (%pd2)\n", origin);
+	dput(origin);
+	origin = NULL;
 	goto out;
 }
 
@@ -269,34 +285,31 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
+static int ovl_check_origin(struct dentry *upperdentry,
+			    struct path *lowerstack, unsigned int numlower,
 			    struct path **stackp, unsigned int *ctrp)
 {
-	struct super_block *same_sb = ovl_same_sb(dentry->d_sb);
-	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct vfsmount *mnt;
-	struct dentry *origin;
+	struct dentry *origin = NULL;
+	int i;
 
-	if (!same_sb || !roe->numlower)
+
+	for (i = 0; i < numlower; i++) {
+		mnt = lowerstack[i].mnt;
+		origin = ovl_get_origin(upperdentry, mnt);
+		if (IS_ERR(origin))
+			return PTR_ERR(origin);
+
+		if (origin)
+			break;
+	}
+
+	if (!origin)
 		return 0;
 
-       /*
-	* Since all layers are on the same fs, we use the first layer for
-	* decoding the file handle.  We may get a disconnected dentry,
-	* which is fine, because we only need to hold the origin inode in
-	* cache and use its inode number.  We may even get a connected dentry,
-	* that is not under the first layer's root.  That is also fine for
-	* using it's inode number - it's the same as if we held a reference
-	* to a dentry in first layer that was moved under us.
-	*/
-	mnt = roe->lowerstack[0].mnt;
-
-	origin = ovl_get_origin(upperdentry, mnt);
-	if (IS_ERR_OR_NULL(origin))
-		return PTR_ERR(origin);
-
-	BUG_ON(*stackp || *ctrp);
-	*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
+	BUG_ON(*ctrp);
+	if (!*stackp)
+		*stackp = kmalloc(sizeof(struct path), GFP_TEMPORARY);
 	if (!*stackp) {
 		dput(origin);
 		return -ENOMEM;
@@ -308,6 +321,215 @@ static int ovl_check_origin(struct dentry *dentry, struct dentry *upperdentry,
 }
 
 /*
+ * Verify that @fh matches the origin file handle stored in OVL_XATTR_ORIGIN.
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+static int ovl_verify_origin_fh(struct dentry *dentry, const struct ovl_fh *fh)
+{
+	struct ovl_fh *ofh = ovl_get_origin_fh(dentry);
+	int err = 0;
+
+	if (!ofh)
+		return -ENODATA;
+
+	if (IS_ERR(ofh))
+		return PTR_ERR(ofh);
+
+	if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+		err = -ESTALE;
+
+	kfree(ofh);
+	return err;
+}
+
+/*
+ * Verify that an inode matches the origin file handle stored in upper inode.
+ *
+ * If @set is true and there is no stored file handle, encode and store origin
+ * file handle in OVL_XATTR_ORIGIN.
+ *
+ * Return 0 on match, -ESTALE on mismatch, < 0 on error.
+ */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+		      struct dentry *origin, bool is_upper, bool set)
+{
+	struct inode *inode;
+	struct ovl_fh *fh;
+	int err;
+
+	fh = ovl_encode_fh(origin, is_upper);
+	err = PTR_ERR(fh);
+	if (IS_ERR(fh))
+		goto fail;
+
+	err = ovl_verify_origin_fh(dentry, fh);
+	if (set && err == -ENODATA)
+		err = ovl_do_setxattr(dentry, OVL_XATTR_ORIGIN, fh, fh->len, 0);
+	if (err)
+		goto fail;
+
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	inode = d_inode(origin);
+	pr_warn_ratelimited("overlayfs: failed to verify origin (%pd2, ino=%lu, err=%i)\n",
+			    origin, inode ? inode->i_ino : 0, err);
+	goto out;
+}
+
+/*
+ * Verify that an index entry name matches the origin file handle stored in
+ * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
+ * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
+ */
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+		     unsigned int numlower)
+{
+	struct ovl_fh *fh = NULL;
+	size_t len;
+	struct path origin = { };
+	struct path *stack = &origin;
+	unsigned int ctr = 0;
+	int err;
+
+	if (!d_inode(index))
+		return 0;
+
+	err = -EISDIR;
+	if (d_is_dir(index))
+		goto fail;
+
+	err = -EINVAL;
+	if (index->d_name.len < sizeof(struct ovl_fh)*2)
+		goto fail;
+
+	err = -ENOMEM;
+	len = index->d_name.len / 2;
+	fh = kzalloc(len, GFP_TEMPORARY);
+	if (!fh)
+		goto fail;
+
+	err = -EINVAL;
+	if (hex2bin((u8 *)fh, index->d_name.name, len) || len != fh->len)
+		goto fail;
+
+	err = ovl_verify_origin_fh(index, fh);
+	if (err)
+		goto fail;
+
+	err = ovl_check_origin(index, lowerstack, numlower, &stack, &ctr);
+	if (!err && !ctr)
+		err = -ESTALE;
+	if (err)
+		goto fail;
+
+	/* Check if index is orphan and don't warn before cleaning it */
+	if (d_inode(index)->i_nlink == 1 &&
+	    ovl_get_nlink(index, origin.dentry, 0) == 0)
+		err = -ENOENT;
+
+	dput(origin.dentry);
+out:
+	kfree(fh);
+	return err;
+
+fail:
+	pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n",
+			    index, err);
+	goto out;
+}
+
+/*
+ * Lookup in indexdir for the index entry of a lower real inode or a copy up
+ * origin inode. The index entry name is the hex representation of the lower
+ * inode file handle.
+ *
+ * If the index dentry in negative, then either no lower aliases have been
+ * copied up yet, or aliases have been copied up in older kernels and are
+ * not indexed.
+ *
+ * If the index dentry for a copy up origin inode is positive, but points
+ * to an inode different than the upper inode, then either the upper inode
+ * has been copied up and not indexed or it was indexed, but since then
+ * index dir was cleared. Either way, that index cannot be used to indentify
+ * the overlay inode.
+ */
+int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+{
+	int err;
+	struct ovl_fh *fh;
+	char *n, *s;
+
+	fh = ovl_encode_fh(origin, false);
+	if (IS_ERR(fh))
+		return PTR_ERR(fh);
+
+	err = -ENOMEM;
+	n = kzalloc(fh->len * 2, GFP_TEMPORARY);
+	if (n) {
+		s  = bin2hex(n, fh, fh->len);
+		*name = (struct qstr) QSTR_INIT(n, s - n);
+		err = 0;
+	}
+	kfree(fh);
+
+	return err;
+
+}
+
+static struct dentry *ovl_lookup_index(struct dentry *dentry,
+				       struct dentry *upper,
+				       struct dentry *origin)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *index;
+	struct inode *inode;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name(origin, &name);
+	if (err)
+		return ERR_PTR(err);
+
+	index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+	if (IS_ERR(index)) {
+		pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
+				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
+				    d_inode(origin)->i_ino, name.len, name.name,
+				    err);
+		goto out;
+	}
+
+	if (d_is_negative(index)) {
+		if (upper && d_inode(origin)->i_nlink > 1) {
+			pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n",
+					    d_inode(origin)->i_ino);
+			goto fail;
+		}
+
+		dput(index);
+		index = NULL;
+	} else if (upper && d_inode(index) != d_inode(upper)) {
+		inode = d_inode(index);
+		pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n",
+				    d_inode(index)->i_ino,
+				    d_inode(upper)->i_ino);
+		goto fail;
+	}
+
+out:
+	kfree(name.name);
+	return index;
+
+fail:
+	dput(index);
+	index = ERR_PTR(-EIO);
+	goto out;
+}
+
+/*
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
  */
@@ -338,10 +560,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 	struct path *stack = NULL;
 	struct dentry *upperdir, *upperdentry = NULL;
+	struct dentry *index = NULL;
 	unsigned int ctr = 0;
 	struct inode *inode = NULL;
 	bool upperopaque = false;
-	bool upperimpure = false;
 	char *upperredirect = NULL;
 	struct dentry *this;
 	unsigned int i;
@@ -359,7 +581,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENAMETOOLONG);
 
 	old_cred = ovl_override_creds(dentry->d_sb);
-	upperdir = ovl_upperdentry_dereference(poe);
+	upperdir = ovl_dentry_upper(dentry->d_parent);
 	if (upperdir) {
 		err = ovl_lookup_layer(upperdir, &d, &upperdentry);
 		if (err)
@@ -372,8 +594,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 		if (upperdentry && !d.is_dir) {
 			BUG_ON(!d.stop || d.redirect);
-			err = ovl_check_origin(dentry, upperdentry,
-					       &stack, &ctr);
+			/*
+			 * Lookup copy up origin by decoding origin file handle.
+			 * We may get a disconnected dentry, which is fine,
+			 * because we only need to hold the origin inode in
+			 * cache and use its inode number.  We may even get a
+			 * connected dentry, that is not under any of the lower
+			 * layers root.  That is also fine for using it's inode
+			 * number - it's the same as if we held a reference
+			 * to a dentry in lower layer that was moved under us.
+			 */
+			err = ovl_check_origin(upperdentry, roe->lowerstack,
+					       roe->numlower, &stack, &ctr);
 			if (err)
 				goto out;
 		}
@@ -386,8 +618,6 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 				poe = roe;
 		}
 		upperopaque = d.opaque;
-		if (upperdentry && d.is_dir)
-			upperimpure = ovl_is_impuredir(upperdentry);
 	}
 
 	if (!d.stop && poe->numlower) {
@@ -428,48 +658,56 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		}
 	}
 
+	/* Lookup index by lower inode and verify it matches upper inode */
+	if (ctr && !d.is_dir && ovl_indexdir(dentry->d_sb)) {
+		struct dentry *origin = stack[0].dentry;
+
+		index = ovl_lookup_index(dentry, upperdentry, origin);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			index = NULL;
+			goto out_put;
+		}
+	}
+
 	oe = ovl_alloc_entry(ctr);
 	err = -ENOMEM;
 	if (!oe)
 		goto out_put;
 
+	oe->opaque = upperopaque;
+	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+	dentry->d_fsdata = oe;
+
+	if (upperdentry)
+		ovl_dentry_set_upper_alias(dentry);
+	else if (index)
+		upperdentry = dget(index);
+
 	if (upperdentry || ctr) {
-		struct dentry *realdentry;
-		struct inode *realinode;
-
-		realdentry = upperdentry ? upperdentry : stack[0].dentry;
-		realinode = d_inode(realdentry);
-
-		err = -ENOMEM;
-		if (upperdentry && !d_is_dir(upperdentry)) {
-			inode = ovl_get_inode(dentry->d_sb, realinode);
-		} else {
-			inode = ovl_new_inode(dentry->d_sb, realinode->i_mode,
-					      realinode->i_rdev);
-			if (inode)
-				ovl_inode_init(inode, realinode, !!upperdentry);
-		}
-		if (!inode)
+		inode = ovl_get_inode(dentry, upperdentry);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
 			goto out_free_oe;
-		ovl_copyattr(realdentry->d_inode, inode);
+
+		OVL_I(inode)->redirect = upperredirect;
+		if (index)
+			ovl_set_flag(OVL_INDEX, inode);
 	}
 
 	revert_creds(old_cred);
-	oe->opaque = upperopaque;
-	oe->impure = upperimpure;
-	oe->redirect = upperredirect;
-	oe->__upperdentry = upperdentry;
-	memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
+	dput(index);
 	kfree(stack);
 	kfree(d.redirect);
-	dentry->d_fsdata = oe;
 	d_add(dentry, inode);
 
 	return NULL;
 
 out_free_oe:
+	dentry->d_fsdata = NULL;
 	kfree(oe);
 out_put:
+	dput(index);
 	for (i = 0; i < ctr; i++)
 		dput(stack[i].dentry);
 	kfree(stack);
@@ -499,7 +737,7 @@ bool ovl_lower_positive(struct dentry *dentry)
 		return oe->opaque;
 
 	/* Negative upper -> positive lower */
-	if (!oe->__upperdentry)
+	if (!ovl_dentry_upper(dentry))
 		return true;
 
 	/* Positive upper -> have to look up lower to see whether it exists */

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 10863b4..60d2660 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h

@@ -25,6 +25,12 @@ enum ovl_path_type {
 #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
 #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
 #define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
+#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
+
+enum ovl_flag {
+	OVL_IMPURE,
+	OVL_INDEX,
+};
 
 /*
  * The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
@@ -38,6 +44,8 @@ enum ovl_path_type {
 /* CPU byte order required for fid decoding:  */
 #define OVL_FH_FLAG_BIG_ENDIAN	(1 << 0)
 #define OVL_FH_FLAG_ANY_ENDIAN	(1 << 1)
+/* Is the real inode encoded in fid an upper inode? */
+#define OVL_FH_FLAG_PATH_UPPER	(1 << 2)
 
 #define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN)
 
@@ -60,8 +68,6 @@ struct ovl_fh {
 	u8 fid[0];	/* file identifier */
 } __packed;
 
-#define OVL_ISUPPER_MASK 1UL
-
 static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int err = vfs_rmdir(dir, dentry);
@@ -175,22 +181,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 	return ret;
 }
 
-static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
-{
-	unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
-
-	if (is_upper)
-		*is_upper = x & OVL_ISUPPER_MASK;
-
-	return (struct inode *) (x & ~OVL_ISUPPER_MASK);
-}
-
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
 struct super_block *ovl_same_sb(struct super_block *sb);
+bool ovl_can_decode_fh(struct super_block *sb);
+struct dentry *ovl_indexdir(struct super_block *sb);
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 bool ovl_dentry_remote(struct dentry *dentry);
 bool ovl_dentry_weird(struct dentry *dentry);
@@ -201,19 +199,22 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct inode *ovl_inode_upper(struct inode *inode);
+struct inode *ovl_inode_lower(struct inode *inode);
+struct inode *ovl_inode_real(struct inode *inode);
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
 bool ovl_dentry_is_opaque(struct dentry *dentry);
-bool ovl_dentry_is_impure(struct dentry *dentry);
 bool ovl_dentry_is_whiteout(struct dentry *dentry);
 void ovl_dentry_set_opaque(struct dentry *dentry);
+bool ovl_dentry_has_upper_alias(struct dentry *dentry);
+void ovl_dentry_set_upper_alias(struct dentry *dentry);
 bool ovl_redirect_dir(struct super_block *sb);
 const char *ovl_dentry_get_redirect(struct dentry *dentry);
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
-void ovl_inode_init(struct inode *inode, struct inode *realinode,
-		    bool is_upper);
-void ovl_inode_update(struct inode *inode, struct inode *upperinode);
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry);
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
 void ovl_dentry_version_inc(struct dentry *dentry);
 u64 ovl_dentry_version_get(struct dentry *dentry);
 bool ovl_is_whiteout(struct dentry *dentry);
@@ -225,6 +226,12 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 		       const char *name, const void *value, size_t size,
 		       int xerr);
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+void ovl_set_flag(unsigned long flag, struct inode *inode);
+bool ovl_test_flag(unsigned long flag, struct inode *inode);
+bool ovl_inuse_trylock(struct dentry *dentry);
+void ovl_inuse_unlock(struct dentry *dentry);
+int ovl_nlink_start(struct dentry *dentry, bool *locked);
+void ovl_nlink_end(struct dentry *dentry, bool locked);
 
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
@@ -233,6 +240,11 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 
 
 /* namei.c */
+int ovl_verify_origin(struct dentry *dentry, struct vfsmount *mnt,
+		      struct dentry *origin, bool is_upper, bool set);
+int ovl_verify_index(struct dentry *index, struct path *lowerstack,
+		     unsigned int numlower);
+int ovl_get_index_name(struct dentry *origin, struct qstr *name);
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 bool ovl_lower_positive(struct dentry *dentry);
@@ -245,8 +257,15 @@ void ovl_cache_free(struct list_head *list);
 int ovl_check_d_type_supported(struct path *realpath);
 void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 			 struct dentry *dentry, int level);
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+			 struct path *lowerstack, unsigned int numlower);
 
 /* inode.c */
+int ovl_set_nlink_upper(struct dentry *dentry);
+int ovl_set_nlink_lower(struct dentry *dentry);
+unsigned int ovl_get_nlink(struct dentry *lowerdentry,
+			   struct dentry *upperdentry,
+			   unsigned int fallback);
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int flags);
@@ -262,7 +281,7 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
-struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
+struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry);
 static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
 	to->i_uid = from->i_uid;
@@ -284,10 +303,11 @@ struct cattr {
 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 		    struct cattr *attr,
 		    struct dentry *hardlink, bool debug);
-void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+int ovl_cleanup(struct inode *dir, struct dentry *dentry);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
+struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper);

diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 34bc4a9..878a750 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h

@@ -14,6 +14,7 @@ struct ovl_config {
 	char *workdir;
 	bool default_permissions;
 	bool redirect_dir;
+	bool index;
 };
 
 /* private information held for overlayfs's superblock */
@@ -21,7 +22,12 @@ struct ovl_fs {
 	struct vfsmount *upper_mnt;
 	unsigned numlower;
 	struct vfsmount **lower_mnt;
+	/* workbasedir is the path at workdir= mount option */
+	struct dentry *workbasedir;
+	/* workdir is the 'work' directory under workbasedir */
 	struct dentry *workdir;
+	/* index directory listing overlay inodes by origin file handle */
+	struct dentry *indexdir;
 	long namelen;
 	/* pathnames of lower and upper dirs, for show_options */
 	struct ovl_config config;
@@ -29,22 +35,16 @@ struct ovl_fs {
 	const struct cred *creator_cred;
 	bool tmpfile;
 	bool noxattr;
-	wait_queue_head_t copyup_wq;
 	/* sb common to all layers */
 	struct super_block *same_sb;
 };
 
 /* private information held for every overlayfs dentry */
 struct ovl_entry {
-	struct dentry *__upperdentry;
-	struct ovl_dir_cache *cache;
 	union {
 		struct {
-			u64 version;
-			const char *redirect;
+			unsigned long has_upper;
 			bool opaque;
-			bool impure;
-			bool copying;
 		};
 		struct rcu_head rcu;
 	};
@@ -54,7 +54,25 @@ struct ovl_entry {
 
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower);
 
-static inline struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
+struct ovl_inode {
+	struct ovl_dir_cache *cache;
+	const char *redirect;
+	u64 version;
+	unsigned long flags;
+	struct inode vfs_inode;
+	struct dentry *__upperdentry;
+	struct inode *lower;
+
+	/* synchronize copy up and more */
+	struct mutex lock;
+};
+
+static inline struct ovl_inode *OVL_I(struct inode *inode)
 {
-	return lockless_dereference(oe->__upperdentry);
+	return container_of(inode, struct ovl_inode, vfs_inode);
+}
+
+static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
+{
+	return lockless_dereference(oi->__upperdentry);
 }

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index f241b4e..0298463 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c

@@ -667,3 +667,53 @@ void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
 		ovl_cleanup(dir, dentry);
 	}
 }
+
+int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
+			 struct path *lowerstack, unsigned int numlower)
+{
+	int err;
+	struct inode *dir = dentry->d_inode;
+	struct path path = { .mnt = mnt, .dentry = dentry };
+	LIST_HEAD(list);
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = RB_ROOT,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(&path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		struct dentry *index;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		index = lookup_one_len(p->name, dentry, p->len);
+		if (IS_ERR(index)) {
+			err = PTR_ERR(index);
+			break;
+		}
+		if (ovl_verify_index(index, lowerstack, numlower)) {
+			err = ovl_cleanup(dir, index);
+			if (err)
+				break;
+		}
+		dput(index);
+	}
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+	if (err)
+		pr_err("overlayfs: failed index dir cleanup (%i)\n", err);
+	return err;
+}

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4882ffb..44dc2d6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c

@@ -34,6 +34,11 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(ovl_redirect_dir_def,
 		 "Default to on or off for the redirect_dir feature");
 
+static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
+module_param_named(index, ovl_index_def, bool, 0644);
+MODULE_PARM_DESC(ovl_index_def,
+		 "Default to on or off for the inodes index feature");
+
 static void ovl_dentry_release(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
@@ -41,8 +46,6 @@ static void ovl_dentry_release(struct dentry *dentry)
 	if (oe) {
 		unsigned int i;
 
-		dput(oe->__upperdentry);
-		kfree(oe->redirect);
 		for (i = 0; i < oe->numlower; i++)
 			dput(oe->lowerstack[i].dentry);
 		kfree_rcu(oe, rcu);
@@ -165,12 +168,52 @@ static const struct dentry_operations ovl_reval_dentry_operations = {
 	.d_weak_revalidate = ovl_dentry_weak_revalidate,
 };
 
+static struct kmem_cache *ovl_inode_cachep;
+
+static struct inode *ovl_alloc_inode(struct super_block *sb)
+{
+	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
+
+	oi->cache = NULL;
+	oi->redirect = NULL;
+	oi->version = 0;
+	oi->flags = 0;
+	oi->__upperdentry = NULL;
+	oi->lower = NULL;
+	mutex_init(&oi->lock);
+
+	return &oi->vfs_inode;
+}
+
+static void ovl_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
+}
+
+static void ovl_destroy_inode(struct inode *inode)
+{
+	struct ovl_inode *oi = OVL_I(inode);
+
+	dput(oi->__upperdentry);
+	kfree(oi->redirect);
+	mutex_destroy(&oi->lock);
+
+	call_rcu(&inode->i_rcu, ovl_i_callback);
+}
+
 static void ovl_put_super(struct super_block *sb)
 {
 	struct ovl_fs *ufs = sb->s_fs_info;
 	unsigned i;
 
+	dput(ufs->indexdir);
 	dput(ufs->workdir);
+	ovl_inuse_unlock(ufs->workbasedir);
+	dput(ufs->workbasedir);
+	if (ufs->upper_mnt)
+		ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
 	mntput(ufs->upper_mnt);
 	for (i = 0; i < ufs->numlower; i++)
 		mntput(ufs->lower_mnt[i]);
@@ -228,6 +271,12 @@ static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return err;
 }
 
+/* Will this overlay be forced to mount/remount ro? */
+static bool ovl_force_readonly(struct ovl_fs *ufs)
+{
+	return (!ufs->upper_mnt || !ufs->workdir);
+}
+
 /**
  * ovl_show_options
  *
@@ -249,6 +298,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ufs->config.redirect_dir != ovl_redirect_dir_def)
 		seq_printf(m, ",redirect_dir=%s",
 			   ufs->config.redirect_dir ? "on" : "off");
+	if (ufs->config.index != ovl_index_def)
+		seq_printf(m, ",index=%s",
+			   ufs->config.index ? "on" : "off");
 	return 0;
 }
 
@@ -256,19 +308,21 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct ovl_fs *ufs = sb->s_fs_info;
 
-	if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
+	if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs))
 		return -EROFS;
 
 	return 0;
 }
 
 static const struct super_operations ovl_super_operations = {
+	.alloc_inode	= ovl_alloc_inode,
+	.destroy_inode	= ovl_destroy_inode,
+	.drop_inode	= generic_delete_inode,
 	.put_super	= ovl_put_super,
 	.sync_fs	= ovl_sync_fs,
 	.statfs		= ovl_statfs,
 	.show_options	= ovl_show_options,
 	.remount_fs	= ovl_remount,
-	.drop_inode	= generic_delete_inode,
 };
 
 enum {
@@ -278,6 +332,8 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_REDIRECT_DIR_ON,
 	OPT_REDIRECT_DIR_OFF,
+	OPT_INDEX_ON,
+	OPT_INDEX_OFF,
 	OPT_ERR,
 };
 
@@ -288,6 +344,8 @@ static const match_table_t ovl_tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_REDIRECT_DIR_ON,		"redirect_dir=on"},
 	{OPT_REDIRECT_DIR_OFF,		"redirect_dir=off"},
+	{OPT_INDEX_ON,			"index=on"},
+	{OPT_INDEX_OFF,			"index=off"},
 	{OPT_ERR,			NULL}
 };
 
@@ -360,6 +418,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->redirect_dir = false;
 			break;
 
+		case OPT_INDEX_ON:
+			config->index = true;
+			break;
+
+		case OPT_INDEX_OFF:
+			config->index = false;
+			break;
+
 		default:
 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 			return -EINVAL;
@@ -378,23 +444,29 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 }
 
 #define OVL_WORKDIR_NAME "work"
+#define OVL_INDEXDIR_NAME "index"
 
-static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
-					 struct dentry *dentry)
+static struct dentry *ovl_workdir_create(struct super_block *sb,
+					 struct ovl_fs *ufs,
+					 struct dentry *dentry,
+					 const char *name, bool persist)
 {
 	struct inode *dir = dentry->d_inode;
+	struct vfsmount *mnt = ufs->upper_mnt;
 	struct dentry *work;
 	int err;
 	bool retried = false;
+	bool locked = false;
 
 	err = mnt_want_write(mnt);
 	if (err)
-		return ERR_PTR(err);
+		goto out_err;
 
 	inode_lock_nested(dir, I_MUTEX_PARENT);
+	locked = true;
+
 retry:
-	work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
-			      strlen(OVL_WORKDIR_NAME));
+	work = lookup_one_len(name, dentry, strlen(name));
 
 	if (!IS_ERR(work)) {
 		struct iattr attr = {
@@ -407,6 +479,9 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
 			if (retried)
 				goto out_dput;
 
+			if (persist)
+				goto out_unlock;
+
 			retried = true;
 			ovl_workdir_cleanup(dir, mnt, work, 0);
 			dput(work);
@@ -446,16 +521,24 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
 		inode_unlock(work->d_inode);
 		if (err)
 			goto out_dput;
+	} else {
+		err = PTR_ERR(work);
+		goto out_err;
 	}
 out_unlock:
-	inode_unlock(dir);
 	mnt_drop_write(mnt);
+	if (locked)
+		inode_unlock(dir);
 
 	return work;
 
 out_dput:
 	dput(work);
-	work = ERR_PTR(err);
+out_err:
+	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+		ufs->config.workdir, name, -err);
+	sb->s_flags |= MS_RDONLY;
+	work = NULL;
 	goto out_unlock;
 }
 
@@ -555,6 +638,15 @@ static int ovl_lower_dir(const char *name, struct path *path,
 	if (ovl_dentry_remote(path->dentry))
 		*remote = true;
 
+	/*
+	 * The inodes index feature needs to encode and decode file
+	 * handles, so it requires that all layers support them.
+	 */
+	if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) {
+		ofs->config.index = false;
+		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name);
+	}
+
 	return 0;
 
 out_put:
@@ -610,7 +702,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 			size_t size, int flags)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
-	struct inode *realinode = ovl_inode_real(inode, NULL);
+	struct inode *realinode = ovl_inode_real(inode);
 	struct posix_acl *acl = NULL;
 	int err;
 
@@ -652,7 +744,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 
 	err = ovl_xattr_set(dentry, handler->name, value, size, flags);
 	if (!err)
-		ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+		ovl_copyattr(ovl_inode_real(inode), inode);
 
 	return err;
 
@@ -734,7 +826,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	struct path upperpath = { };
 	struct path workpath = { };
 	struct dentry *root_dentry;
-	struct inode *realinode;
 	struct ovl_entry *oe;
 	struct ovl_fs *ufs;
 	struct path *stack = NULL;
@@ -752,8 +843,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ufs)
 		goto out;
 
-	init_waitqueue_head(&ufs->copyup_wq);
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
+	ufs->config.index = ovl_index_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
 		goto out_free_config;
@@ -788,9 +879,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		if (err)
 			goto out_put_upperpath;
 
+		err = -EBUSY;
+		if (!ovl_inuse_trylock(upperpath.dentry)) {
+			pr_err("overlayfs: upperdir is in-use by another mount\n");
+			goto out_put_upperpath;
+		}
+
 		err = ovl_mount_dir(ufs->config.workdir, &workpath);
 		if (err)
-			goto out_put_upperpath;
+			goto out_unlock_upperdentry;
 
 		err = -EINVAL;
 		if (upperpath.mnt != workpath.mnt) {
@@ -801,12 +898,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
 			goto out_put_workpath;
 		}
+
+		err = -EBUSY;
+		if (!ovl_inuse_trylock(workpath.dentry)) {
+			pr_err("overlayfs: workdir is in-use by another mount\n");
+			goto out_put_workpath;
+		}
+
+		ufs->workbasedir = workpath.dentry;
 		sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
 	}
 	err = -ENOMEM;
 	lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
 	if (!lowertmp)
-		goto out_put_workpath;
+		goto out_unlock_workdentry;
 
 	err = -EINVAL;
 	stacklen = ovl_split_lowerdirs(lowertmp);
@@ -849,20 +954,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			pr_err("overlayfs: failed to clone upperpath\n");
 			goto out_put_lowerpath;
 		}
+
 		/* Don't inherit atime flags */
 		ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 
 		sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
 
-		ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
-		err = PTR_ERR(ufs->workdir);
-		if (IS_ERR(ufs->workdir)) {
-			pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
-				ufs->config.workdir, OVL_WORKDIR_NAME, -err);
-			sb->s_flags |= MS_RDONLY;
-			ufs->workdir = NULL;
-		}
-
+		ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+						  OVL_WORKDIR_NAME, false);
 		/*
 		 * Upper should support d_type, else whiteouts are visible.
 		 * Given workdir and upper are on same fs, we can do
@@ -904,6 +1003,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			} else {
 				vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
 			}
+
+			/* Check if upper/work fs supports file handles */
+			if (ufs->config.index &&
+			    !ovl_can_decode_fh(ufs->workdir->d_sb)) {
+				ufs->config.index = false;
+				pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
+			}
 		}
 	}
 
@@ -941,6 +1047,44 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else if (ufs->upper_mnt->mnt_sb != ufs->same_sb)
 		ufs->same_sb = NULL;
 
+	if (!(ovl_force_readonly(ufs)) && ufs->config.index) {
+		/* Verify lower root is upper root origin */
+		err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0],
+					stack[0].dentry, false, true);
+		if (err) {
+			pr_err("overlayfs: failed to verify upper root origin\n");
+			goto out_put_lower_mnt;
+		}
+
+		ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry,
+						   OVL_INDEXDIR_NAME, true);
+		err = PTR_ERR(ufs->indexdir);
+		if (IS_ERR(ufs->indexdir))
+			goto out_put_lower_mnt;
+
+		if (ufs->indexdir) {
+			/* Verify upper root is index dir origin */
+			err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt,
+						upperpath.dentry, true, true);
+			if (err)
+				pr_err("overlayfs: failed to verify index dir origin\n");
+
+			/* Cleanup bad/stale/orphan index entries */
+			if (!err)
+				err = ovl_indexdir_cleanup(ufs->indexdir,
+							   ufs->upper_mnt,
+							   stack, numlower);
+		}
+		if (err || !ufs->indexdir)
+			pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
+		if (err)
+			goto out_put_indexdir;
+	}
+
+	/* Show index=off/on in /proc/mounts for any of the reasons above */
+	if (!ufs->indexdir)
+		ufs->config.index = false;
+
 	if (remote)
 		sb->s_d_op = &ovl_reval_dentry_operations;
 	else
@@ -948,7 +1092,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	ufs->creator_cred = cred = prepare_creds();
 	if (!cred)
-		goto out_put_lower_mnt;
+		goto out_put_indexdir;
 
 	/* Never override disk quota limits or use reserved space */
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
@@ -971,12 +1115,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	mntput(upperpath.mnt);
 	for (i = 0; i < numlower; i++)
 		mntput(stack[i].mnt);
-	path_put(&workpath);
+	mntput(workpath.mnt);
 	kfree(lowertmp);
 
 	if (upperpath.dentry) {
-		oe->__upperdentry = upperpath.dentry;
-		oe->impure = ovl_is_impuredir(upperpath.dentry);
+		oe->has_upper = true;
+		if (ovl_is_impuredir(upperpath.dentry))
+			ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
 	}
 	for (i = 0; i < numlower; i++) {
 		oe->lowerstack[i].dentry = stack[i].dentry;
@@ -986,9 +1131,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	root_dentry->d_fsdata = oe;
 
-	realinode = d_inode(ovl_dentry_real(root_dentry));
-	ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
-	ovl_copyattr(realinode, d_inode(root_dentry));
+	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
+		       ovl_dentry_lower(root_dentry));
 
 	sb->s_root = root_dentry;
 
@@ -998,6 +1142,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(oe);
 out_put_cred:
 	put_cred(ufs->creator_cred);
+out_put_indexdir:
+	dput(ufs->indexdir);
 out_put_lower_mnt:
 	for (i = 0; i < ufs->numlower; i++)
 		mntput(ufs->lower_mnt[i]);
@@ -1011,8 +1157,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	kfree(stack);
 out_free_lowertmp:
 	kfree(lowertmp);
+out_unlock_workdentry:
+	ovl_inuse_unlock(workpath.dentry);
 out_put_workpath:
 	path_put(&workpath);
+out_unlock_upperdentry:
+	ovl_inuse_unlock(upperpath.dentry);
 out_put_upperpath:
 	path_put(&upperpath);
 out_free_config:
@@ -1038,14 +1188,43 @@ static struct file_system_type ovl_fs_type = {
 };
 MODULE_ALIAS_FS("overlay");
 
+static void ovl_inode_init_once(void *foo)
+{
+	struct ovl_inode *oi = foo;
+
+	inode_init_once(&oi->vfs_inode);
+}
+
 static int __init ovl_init(void)
 {
-	return register_filesystem(&ovl_fs_type);
+	int err;
+
+	ovl_inode_cachep = kmem_cache_create("ovl_inode",
+					     sizeof(struct ovl_inode), 0,
+					     (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+					     ovl_inode_init_once);
+	if (ovl_inode_cachep == NULL)
+		return -ENOMEM;
+
+	err = register_filesystem(&ovl_fs_type);
+	if (err)
+		kmem_cache_destroy(ovl_inode_cachep);
+
+	return err;
 }
 
 static void __exit ovl_exit(void)
 {
 	unregister_filesystem(&ovl_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+	kmem_cache_destroy(ovl_inode_cachep);
+
 }
 
 module_init(ovl_init);

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 8090489..c492ba7 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c

@@ -12,6 +12,10 @@
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/xattr.h>
+#include <linux/exportfs.h>
+#include <linux/uuid.h>
+#include <linux/namei.h>
+#include <linux/ratelimit.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 
@@ -47,6 +51,19 @@ struct super_block *ovl_same_sb(struct super_block *sb)
 	return ofs->same_sb;
 }
 
+bool ovl_can_decode_fh(struct super_block *sb)
+{
+	return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
+		!uuid_is_null(&sb->s_uuid));
+}
+
+struct dentry *ovl_indexdir(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->indexdir;
+}
+
 struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 {
 	size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
@@ -78,7 +95,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 	struct ovl_entry *oe = dentry->d_fsdata;
 	enum ovl_path_type type = 0;
 
-	if (oe->__upperdentry) {
+	if (ovl_dentry_upper(dentry)) {
 		type = __OVL_PATH_UPPER;
 
 		/*
@@ -99,10 +116,9 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
 void ovl_path_upper(struct dentry *dentry, struct path *path)
 {
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
 
 	path->mnt = ofs->upper_mnt;
-	path->dentry = ovl_upperdentry_dereference(oe);
+	path->dentry = ovl_dentry_upper(dentry);
 }
 
 void ovl_path_lower(struct dentry *dentry, struct path *path)
@@ -126,47 +142,47 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
 
 struct dentry *ovl_dentry_upper(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return ovl_upperdentry_dereference(oe);
-}
-
-static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
-{
-	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
+	return ovl_upperdentry_dereference(OVL_I(d_inode(dentry)));
 }
 
 struct dentry *ovl_dentry_lower(struct dentry *dentry)
 {
 	struct ovl_entry *oe = dentry->d_fsdata;
 
-	return __ovl_dentry_lower(oe);
+	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
 }
 
 struct dentry *ovl_dentry_real(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-	struct dentry *realdentry;
-
-	realdentry = ovl_upperdentry_dereference(oe);
-	if (!realdentry)
-		realdentry = __ovl_dentry_lower(oe);
-
-	return realdentry;
+	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
 }
 
+struct inode *ovl_inode_upper(struct inode *inode)
+{
+	struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode));
+
+	return upperdentry ? d_inode(upperdentry) : NULL;
+}
+
+struct inode *ovl_inode_lower(struct inode *inode)
+{
+	return OVL_I(inode)->lower;
+}
+
+struct inode *ovl_inode_real(struct inode *inode)
+{
+	return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
+}
+
+
 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->cache;
+	return OVL_I(d_inode(dentry))->cache;
 }
 
 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	oe->cache = cache;
+	OVL_I(d_inode(dentry))->cache = cache;
 }
 
 bool ovl_dentry_is_opaque(struct dentry *dentry)
@@ -175,13 +191,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
 	return oe->opaque;
 }
 
-bool ovl_dentry_is_impure(struct dentry *dentry)
-{
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->impure;
-}
-
 bool ovl_dentry_is_whiteout(struct dentry *dentry)
 {
 	return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -194,6 +203,25 @@ void ovl_dentry_set_opaque(struct dentry *dentry)
 	oe->opaque = true;
 }
 
+/*
+ * For hard links it's possible for ovl_dentry_upper() to return positive, while
+ * there's no actual upper alias for the inode.  Copy up code needs to know
+ * about the existence of the upper alias, so it can't use ovl_dentry_upper().
+ */
+bool ovl_dentry_has_upper_alias(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->has_upper;
+}
+
+void ovl_dentry_set_upper_alias(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	oe->has_upper = true;
+}
+
 bool ovl_redirect_dir(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
@@ -203,63 +231,59 @@ bool ovl_redirect_dir(struct super_block *sb)
 
 const char *ovl_dentry_get_redirect(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->redirect;
+	return OVL_I(d_inode(dentry))->redirect;
 }
 
 void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 
-	kfree(oe->redirect);
-	oe->redirect = redirect;
+	kfree(oi->redirect);
+	oi->redirect = redirect;
 }
 
-void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
+		    struct dentry *lowerdentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	if (upperdentry)
+		OVL_I(inode)->__upperdentry = upperdentry;
+	if (lowerdentry)
+		OVL_I(inode)->lower = d_inode(lowerdentry);
 
-	WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
-	WARN_ON(oe->__upperdentry);
+	ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
+}
+
+void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
+{
+	struct inode *upperinode = d_inode(upperdentry);
+
+	WARN_ON(OVL_I(inode)->__upperdentry);
+
 	/*
-	 * Make sure upperdentry is consistent before making it visible to
-	 * ovl_upperdentry_dereference().
+	 * Make sure upperdentry is consistent before making it visible
 	 */
 	smp_wmb();
-	oe->__upperdentry = upperdentry;
-}
-
-void ovl_inode_init(struct inode *inode, struct inode *realinode, bool is_upper)
-{
-	WRITE_ONCE(inode->i_private, (unsigned long) realinode |
-		   (is_upper ? OVL_ISUPPER_MASK : 0));
-}
-
-void ovl_inode_update(struct inode *inode, struct inode *upperinode)
-{
-	WARN_ON(!upperinode);
-	WARN_ON(!inode_unhashed(inode));
-	WRITE_ONCE(inode->i_private,
-		   (unsigned long) upperinode | OVL_ISUPPER_MASK);
-	if (!S_ISDIR(upperinode->i_mode))
+	OVL_I(inode)->__upperdentry = upperdentry;
+	if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
+		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
+	}
 }
 
 void ovl_dentry_version_inc(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct inode *inode = d_inode(dentry);
 
-	WARN_ON(!inode_is_locked(dentry->d_inode));
-	oe->version++;
+	WARN_ON(!inode_is_locked(inode));
+	OVL_I(inode)->version++;
 }
 
 u64 ovl_dentry_version_get(struct dentry *dentry)
 {
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct inode *inode = d_inode(dentry);
 
-	WARN_ON(!inode_is_locked(dentry->d_inode));
-	return oe->version;
+	WARN_ON(!inode_is_locked(inode));
+	return OVL_I(inode)->version;
 }
 
 bool ovl_is_whiteout(struct dentry *dentry)
@@ -276,32 +300,21 @@ struct file *ovl_path_open(struct path *path, int flags)
 
 int ovl_copy_up_start(struct dentry *dentry)
 {
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 	int err;
 
-	spin_lock(&ofs->copyup_wq.lock);
-	err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
-	if (!err) {
-		if (oe->__upperdentry)
-			err = 1; /* Already copied up */
-		else
-			oe->copying = true;
+	err = mutex_lock_interruptible(&oi->lock);
+	if (!err && ovl_dentry_has_upper_alias(dentry)) {
+		err = 1; /* Already copied up */
+		mutex_unlock(&oi->lock);
 	}
-	spin_unlock(&ofs->copyup_wq.lock);
 
 	return err;
 }
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
-	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	spin_lock(&ofs->copyup_wq.lock);
-	oe->copying = false;
-	wake_up_locked(&ofs->copyup_wq);
-	spin_unlock(&ofs->copyup_wq.lock);
+	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
 }
 
 bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
@@ -343,9 +356,8 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
 int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 {
 	int err;
-	struct ovl_entry *oe = dentry->d_fsdata;
 
-	if (oe->impure)
+	if (ovl_test_flag(OVL_IMPURE, d_inode(dentry)))
 		return 0;
 
 	/*
@@ -355,7 +367,176 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
 	err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
 				 "y", 1, 0);
 	if (!err)
-		oe->impure = true;
+		ovl_set_flag(OVL_IMPURE, d_inode(dentry));
 
 	return err;
 }
+
+void ovl_set_flag(unsigned long flag, struct inode *inode)
+{
+	set_bit(flag, &OVL_I(inode)->flags);
+}
+
+bool ovl_test_flag(unsigned long flag, struct inode *inode)
+{
+	return test_bit(flag, &OVL_I(inode)->flags);
+}
+
+/**
+ * Caller must hold a reference to inode to prevent it from being freed while
+ * it is marked inuse.
+ */
+bool ovl_inuse_trylock(struct dentry *dentry)
+{
+	struct inode *inode = d_inode(dentry);
+	bool locked = false;
+
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & I_OVL_INUSE)) {
+		inode->i_state |= I_OVL_INUSE;
+		locked = true;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return locked;
+}
+
+void ovl_inuse_unlock(struct dentry *dentry)
+{
+	if (dentry) {
+		struct inode *inode = d_inode(dentry);
+
+		spin_lock(&inode->i_lock);
+		WARN_ON(!(inode->i_state & I_OVL_INUSE));
+		inode->i_state &= ~I_OVL_INUSE;
+		spin_unlock(&inode->i_lock);
+	}
+}
+
+/* Called must hold OVL_I(inode)->oi_lock */
+static void ovl_cleanup_index(struct dentry *dentry)
+{
+	struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
+	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
+	struct dentry *upperdentry = ovl_dentry_upper(dentry);
+	struct dentry *index = NULL;
+	struct inode *inode;
+	struct qstr name;
+	int err;
+
+	err = ovl_get_index_name(lowerdentry, &name);
+	if (err)
+		goto fail;
+
+	inode = d_inode(upperdentry);
+	if (inode->i_nlink != 1) {
+		pr_warn_ratelimited("overlayfs: cleanup linked index (%pd2, ino=%lu, nlink=%u)\n",
+				    upperdentry, inode->i_ino, inode->i_nlink);
+		/*
+		 * We either have a bug with persistent union nlink or a lower
+		 * hardlink was added while overlay is mounted. Adding a lower
+		 * hardlink and then unlinking all overlay hardlinks would drop
+		 * overlay nlink to zero before all upper inodes are unlinked.
+		 * As a safety measure, when that situation is detected, set
+		 * the overlay nlink to the index inode nlink minus one for the
+		 * index entry itself.
+		 */
+		set_nlink(d_inode(dentry), inode->i_nlink - 1);
+		ovl_set_nlink_upper(dentry);
+		goto out;
+	}
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	/* TODO: whiteout instead of cleanup to block future open by handle */
+	index = lookup_one_len(name.name, ovl_indexdir(dentry->d_sb), name.len);
+	err = PTR_ERR(index);
+	if (!IS_ERR(index))
+		err = ovl_cleanup(dir, index);
+	inode_unlock(dir);
+	if (err)
+		goto fail;
+
+out:
+	dput(index);
+	return;
+
+fail:
+	pr_err("overlayfs: cleanup index of '%pd2' failed (%i)\n", dentry, err);
+	goto out;
+}
+
+/*
+ * Operations that change overlay inode and upper inode nlink need to be
+ * synchronized with copy up for persistent nlink accounting.
+ */
+int ovl_nlink_start(struct dentry *dentry, bool *locked)
+{
+	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	const struct cred *old_cred;
+	int err;
+
+	if (!d_inode(dentry) || d_is_dir(dentry))
+		return 0;
+
+	/*
+	 * With inodes index is enabled, we store the union overlay nlink
+	 * in an xattr on the index inode. When whiting out lower hardlinks
+	 * we need to decrement the overlay persistent nlink, but before the
+	 * first copy up, we have no upper index inode to store the xattr.
+	 *
+	 * As a workaround, before whiteout/rename over of a lower hardlink,
+	 * copy up to create the upper index. Creating the upper index will
+	 * initialize the overlay nlink, so it could be dropped if unlink
+	 * or rename succeeds.
+	 *
+	 * TODO: implement metadata only index copy up when called with
+	 *       ovl_copy_up_flags(dentry, O_PATH).
+	 */
+	if (ovl_indexdir(dentry->d_sb) && !ovl_dentry_has_upper_alias(dentry) &&
+	    d_inode(ovl_dentry_lower(dentry))->i_nlink > 1) {
+		err = ovl_copy_up(dentry);
+		if (err)
+			return err;
+	}
+
+	err = mutex_lock_interruptible(&oi->lock);
+	if (err)
+		return err;
+
+	if (!ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+		goto out;
+
+	old_cred = ovl_override_creds(dentry->d_sb);
+	/*
+	 * The overlay inode nlink should be incremented/decremented IFF the
+	 * upper operation succeeds, along with nlink change of upper inode.
+	 * Therefore, before link/unlink/rename, we store the union nlink
+	 * value relative to the upper inode nlink in an upper inode xattr.
+	 */
+	err = ovl_set_nlink_upper(dentry);
+	revert_creds(old_cred);
+
+out:
+	if (err)
+		mutex_unlock(&oi->lock);
+	else
+		*locked = true;
+
+	return err;
+}
+
+void ovl_nlink_end(struct dentry *dentry, bool locked)
+{
+	if (locked) {
+		if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+		    d_inode(dentry)->i_nlink == 0) {
+			const struct cred *old_cred;
+
+			old_cred = ovl_override_creds(dentry->d_sb);
+			ovl_cleanup_index(dentry);
+			revert_creds(old_cred);
+		}
+
+		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+	}
+}

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f1e1927..88b773f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -1355,6 +1355,53 @@ static const struct file_operations proc_fault_inject_operations = {
 	.write		= proc_fault_inject_write,
 	.llseek		= generic_file_llseek,
 };
+
+static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err, n;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	err = kstrtoint_from_user(buf, count, 10, &n);
+	if (err)
+		return err;
+	if (n < 0 || n == INT_MAX)
+		return -EINVAL;
+	current->fail_nth = n + 1;
+	return count;
+}
+
+static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct task_struct *task;
+	int err;
+
+	task = get_proc_task(file_inode(file));
+	if (!task)
+		return -ESRCH;
+	put_task_struct(task);
+	if (task != current)
+		return -EPERM;
+	if (count < 1)
+		return -EINVAL;
+	err = put_user((char)(current->fail_nth ? 'N' : 'Y'), buf);
+	if (err)
+		return err;
+	current->fail_nth = 0;
+	return 1;
+}
+
+static const struct file_operations proc_fail_nth_operations = {
+	.read		= proc_fail_nth_read,
+	.write		= proc_fail_nth_write,
+};
 #endif
 
 
@@ -3311,6 +3358,11 @@ static const struct pid_entry tid_base_stuff[] = {
 #endif
 #ifdef CONFIG_FAULT_INJECTION
 	REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
+	/*
+	 * Operations on the file check that the task is current,
+	 * so we create it with 0666 to support testing under unprivileged user.
+	 */
+	REG("fail-nth", 0666, proc_fail_nth_operations),
 #endif
 #ifdef CONFIG_TASK_IO_ACCOUNTING
 	ONE("io",	S_IRUSR, proc_tid_io_accounting),

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9425c0d..e3cda0b 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c

@@ -180,7 +180,6 @@ static int xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 }
 
 static DEFINE_IDA(proc_inum_ida);
-static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 
 #define PROC_DYNAMIC_FIRST 0xF0000000U
 
@@ -190,37 +189,20 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  */
 int proc_alloc_inum(unsigned int *inum)
 {
-	unsigned int i;
-	int error;
+	int i;
 
-retry:
-	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
-		return -ENOMEM;
+	i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1,
+			   GFP_KERNEL);
+	if (i < 0)
+		return i;
 
-	spin_lock_irq(&proc_inum_lock);
-	error = ida_get_new(&proc_inum_ida, &i);
-	spin_unlock_irq(&proc_inum_lock);
-	if (error == -EAGAIN)
-		goto retry;
-	else if (error)
-		return error;
-
-	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
-		spin_lock_irq(&proc_inum_lock);
-		ida_remove(&proc_inum_ida, i);
-		spin_unlock_irq(&proc_inum_lock);
-		return -ENOSPC;
-	}
-	*inum = PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + (unsigned int)i;
 	return 0;
 }
 
 void proc_free_inum(unsigned int inum)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&proc_inum_lock, flags);
-	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
-	spin_unlock_irqrestore(&proc_inum_lock, flags);
+	ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
 }
 
 /*

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c5ae09b..1869459 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h

@@ -67,7 +67,7 @@ struct proc_inode {
 	struct proc_dir_entry *pde;
 	struct ctl_table_header *sysctl;
 	struct ctl_table *sysctl_entry;
-	struct list_head sysctl_inodes;
+	struct hlist_node sysctl_inodes;
 	const struct proc_ns_operations *ns_ops;
 	struct inode vfs_inode;
 };

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67985a7..8f47922 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c

@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
 	head->set = set;
 	head->parent = NULL;
 	head->node = node;
-	INIT_LIST_HEAD(&head->inodes);
+	INIT_HLIST_HEAD(&head->inodes);
 	if (node) {
 		struct ctl_table *entry;
 		for (entry = table; entry->procname; entry++, node++)
@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
 			complete(p->unregistering);
 }
 
-/* called under sysctl_lock */
 static void proc_sys_prune_dcache(struct ctl_table_header *head)
 {
-	struct inode *inode, *prev = NULL;
+	struct inode *inode;
 	struct proc_inode *ei;
+	struct hlist_node *node;
+	struct super_block *sb;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
-		inode = igrab(&ei->vfs_inode);
-		if (inode) {
-			rcu_read_unlock();
-			iput(prev);
-			prev = inode;
-			d_prune_aliases(inode);
+	for (;;) {
+		node = hlist_first_rcu(&head->inodes);
+		if (!node)
+			break;
+		ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
+		spin_lock(&sysctl_lock);
+		hlist_del_init_rcu(&ei->sysctl_inodes);
+		spin_unlock(&sysctl_lock);
+
+		inode = &ei->vfs_inode;
+		sb = inode->i_sb;
+		if (!atomic_inc_not_zero(&sb->s_active))
+			continue;
+		inode = igrab(inode);
+		rcu_read_unlock();
+		if (unlikely(!inode)) {
+			deactivate_super(sb);
 			rcu_read_lock();
+			continue;
 		}
+
+		d_prune_aliases(inode);
+		iput(inode);
+		deactivate_super(sb);
+
+		rcu_read_lock();
 	}
 	rcu_read_unlock();
-	iput(prev);
 }
 
 /* called under sysctl_lock, will reacquire if has to wait */
@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	}
 	ei->sysctl = head;
 	ei->sysctl_entry = table;
-	list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+	hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
 	head->count++;
 	spin_unlock(&sysctl_lock);
 
@@ -489,7 +506,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 {
 	spin_lock(&sysctl_lock);
-	list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+	hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
 	if (!--head->count)
 		kfree_rcu(head, rcu);
 	spin_unlock(&sysctl_lock);
@@ -1061,16 +1078,30 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
 	return -EINVAL;
 }
 
+static int sysctl_check_table_array(const char *path, struct ctl_table *table)
+{
+	int err = 0;
+
+	if ((table->proc_handler == proc_douintvec) ||
+	    (table->proc_handler == proc_douintvec_minmax)) {
+		if (table->maxlen != sizeof(unsigned int))
+			err |= sysctl_err(path, table, "array now allowed");
+	}
+
+	return err;
+}
+
 static int sysctl_check_table(const char *path, struct ctl_table *table)
 {
 	int err = 0;
 	for (; table->procname; table++) {
 		if (table->child)
-			err = sysctl_err(path, table, "Not a file");
+			err |= sysctl_err(path, table, "Not a file");
 
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
 		    (table->proc_handler == proc_douintvec) ||
+		    (table->proc_handler == proc_douintvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
@@ -1078,15 +1109,17 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 		    (table->proc_handler == proc_doulongvec_minmax) ||
 		    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
 			if (!table->data)
-				err = sysctl_err(path, table, "No data");
+				err |= sysctl_err(path, table, "No data");
 			if (!table->maxlen)
-				err = sysctl_err(path, table, "No maxlen");
+				err |= sysctl_err(path, table, "No maxlen");
+			else
+				err |= sysctl_check_table_array(path, table);
 		}
 		if (!table->proc_handler)
-			err = sysctl_err(path, table, "No proc_handler");
+			err |= sysctl_err(path, table, "No proc_handler");
 
 		if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
-			err = sysctl_err(path, table, "bogus .mode 0%o",
+			err |= sysctl_err(path, table, "bogus .mode 0%o",
 				table->mode);
 	}
 	return err;

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 520802d..b836fd6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -298,7 +298,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
 	}
 
-	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
 	end = vma->vm_end;
 

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index dc198bc..edc8ef7 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c

@@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
 			       "inode has negative prealloc blocks count.");
 #endif
 	while (ei->i_prealloc_count > 0) {
-		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
-		ei->i_prealloc_block++;
+		b_blocknr_t block_to_free;
+
+		/*
+		 * reiserfs_free_prealloc_block can drop the write lock,
+		 * which could allow another caller to free the same block.
+		 * We can protect against it by modifying the prealloc
+		 * state before calling it.
+		 */
+		block_to_free = ei->i_prealloc_block++;
 		ei->i_prealloc_count--;
+		reiserfs_free_prealloc_block(th, inode, block_to_free);
 		dirty = 1;
 	}
 	if (dirty)
@@ -1128,7 +1136,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
 	hint->prealloc_size = 0;
 
 	if (!hint->formatted_node && hint->preallocate) {
-		if (S_ISREG(hint->inode->i_mode)
+		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
 		    && hint->inode->i_size >=
 		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
 		    preallocmin * hint->inode->i_sb->s_blocksize)

diff --git a/fs/udf/file.c b/fs/udf/file.c
index f5eb2d5..356c2bf 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c

@@ -43,10 +43,15 @@ static void __udf_adinicb_readpage(struct page *page)
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	loff_t isize = i_size_read(inode);
 
+	/*
+	 * We have to be careful here as truncate can change i_size under us.
+	 * So just sample it once and use the same value everywhere.
+	 */
 	kaddr = kmap_atomic(page);
-	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
-	memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
+	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, isize);
+	memset(kaddr + isize, 0, PAGE_SIZE - isize);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
 	kunmap_atomic(kaddr);
@@ -71,7 +76,8 @@ static int udf_adinicb_writepage(struct page *page,
 	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap_atomic(page);
-	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
+	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
+		i_size_read(inode));
 	SetPageUptodate(page);
 	kunmap_atomic(kaddr);
 	mark_inode_dirty(inode);

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 98c510e..18fdb9d 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c

@@ -1222,8 +1222,8 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 			return err;
 		}
 set_size:
-		truncate_setsize(inode, newsize);
 		up_write(&iinfo->i_data_sem);
+		truncate_setsize(inode, newsize);
 	} else {
 		if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
 			down_write(&iinfo->i_data_sem);
@@ -1240,9 +1240,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
 					  udf_get_block);
 		if (err)
 			return err;
+		truncate_setsize(inode, newsize);
 		down_write(&iinfo->i_data_sem);
 		udf_clear_extent_cache(inode);
-		truncate_setsize(inode, newsize);
 		udf_truncate_extents(inode);
 		up_write(&iinfo->i_data_sem);
 	}

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 14b4bc1..462ac2e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c

@@ -73,8 +73,6 @@
 #define VDS_POS_TERMINATING_DESC	6
 #define VDS_POS_LENGTH			7
 
-#define UDF_DEFAULT_BLOCKSIZE 2048
-
 #define VSD_FIRST_SECTOR_OFFSET		32768
 #define VSD_MAX_SECTOR_OFFSET		0x800000
 

diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index 77c331f..14626b3 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c

@@ -38,56 +38,11 @@
 
 #include <linux/types.h>
 #include <linux/kernel.h>
-
-#define EPOCH_YEAR 1970
-
-#ifndef __isleap
-/* Nonzero if YEAR is a leap year (every 4 years,
-   except every 100th isn't, and every 400th is).  */
-#define	__isleap(year)	\
-  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
-#endif
-
-/* How many days come before each month (0-12).  */
-static const unsigned short int __mon_yday[2][13] = {
-	/* Normal years.  */
-	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
-	/* Leap years.  */
-	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
-};
-
-#define MAX_YEAR_SECONDS	69
-#define SPD			0x15180	/*3600*24 */
-#define SPY(y, l, s)		(SPD * (365 * y + l) + s)
-
-static time_t year_seconds[MAX_YEAR_SECONDS] = {
-/*1970*/ SPY(0,   0, 0), SPY(1,   0, 0), SPY(2,   0, 0), SPY(3,   1, 0),
-/*1974*/ SPY(4,   1, 0), SPY(5,   1, 0), SPY(6,   1, 0), SPY(7,   2, 0),
-/*1978*/ SPY(8,   2, 0), SPY(9,   2, 0), SPY(10,  2, 0), SPY(11,  3, 0),
-/*1982*/ SPY(12,  3, 0), SPY(13,  3, 0), SPY(14,  3, 0), SPY(15,  4, 0),
-/*1986*/ SPY(16,  4, 0), SPY(17,  4, 0), SPY(18,  4, 0), SPY(19,  5, 0),
-/*1990*/ SPY(20,  5, 0), SPY(21,  5, 0), SPY(22,  5, 0), SPY(23,  6, 0),
-/*1994*/ SPY(24,  6, 0), SPY(25,  6, 0), SPY(26,  6, 0), SPY(27,  7, 0),
-/*1998*/ SPY(28,  7, 0), SPY(29,  7, 0), SPY(30,  7, 0), SPY(31,  8, 0),
-/*2002*/ SPY(32,  8, 0), SPY(33,  8, 0), SPY(34,  8, 0), SPY(35,  9, 0),
-/*2006*/ SPY(36,  9, 0), SPY(37,  9, 0), SPY(38,  9, 0), SPY(39, 10, 0),
-/*2010*/ SPY(40, 10, 0), SPY(41, 10, 0), SPY(42, 10, 0), SPY(43, 11, 0),
-/*2014*/ SPY(44, 11, 0), SPY(45, 11, 0), SPY(46, 11, 0), SPY(47, 12, 0),
-/*2018*/ SPY(48, 12, 0), SPY(49, 12, 0), SPY(50, 12, 0), SPY(51, 13, 0),
-/*2022*/ SPY(52, 13, 0), SPY(53, 13, 0), SPY(54, 13, 0), SPY(55, 14, 0),
-/*2026*/ SPY(56, 14, 0), SPY(57, 14, 0), SPY(58, 14, 0), SPY(59, 15, 0),
-/*2030*/ SPY(60, 15, 0), SPY(61, 15, 0), SPY(62, 15, 0), SPY(63, 16, 0),
-/*2034*/ SPY(64, 16, 0), SPY(65, 16, 0), SPY(66, 16, 0), SPY(67, 17, 0),
-/*2038*/ SPY(68, 17, 0)
-};
-
-#define SECS_PER_HOUR	(60 * 60)
-#define SECS_PER_DAY	(SECS_PER_HOUR * 24)
+#include <linux/time.h>
 
 struct timespec *
 udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 {
-	int yday;
 	u16 typeAndTimezone = le16_to_cpu(src.typeAndTimezone);
 	u16 year = le16_to_cpu(src.year);
 	uint8_t type = typeAndTimezone >> 12;
@@ -102,15 +57,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 	} else
 		offset = 0;
 
-	if ((year < EPOCH_YEAR) ||
-	    (year >= EPOCH_YEAR + MAX_YEAR_SECONDS)) {
-		return NULL;
-	}
-	dest->tv_sec = year_seconds[year - EPOCH_YEAR];
+	dest->tv_sec = mktime64(year, src.month, src.day, src.hour, src.minute,
+			src.second);
 	dest->tv_sec -= offset * 60;
-
-	yday = ((__mon_yday[__isleap(year)][src.month - 1]) + src.day - 1);
-	dest->tv_sec += (((yday * 24) + src.hour) * 60 + src.minute) * 60 + src.second;
 	dest->tv_nsec = 1000 * (src.centiseconds * 10000 +
 			src.hundredsOfMicroseconds * 100 + src.microseconds);
 	return dest;
@@ -119,9 +68,9 @@ udf_disk_stamp_to_time(struct timespec *dest, struct timestamp src)
 struct timestamp *
 udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
 {
-	long int days, rem, y;
-	const unsigned short int *ip;
+	long seconds;
 	int16_t offset;
+	struct tm tm;
 
 	offset = -sys_tz.tz_minuteswest;
 
@@ -130,35 +79,14 @@ udf_time_to_disk_stamp(struct timestamp *dest, struct timespec ts)
 
 	dest->typeAndTimezone = cpu_to_le16(0x1000 | (offset & 0x0FFF));
 
-	ts.tv_sec += offset * 60;
-	days = ts.tv_sec / SECS_PER_DAY;
-	rem = ts.tv_sec % SECS_PER_DAY;
-	dest->hour = rem / SECS_PER_HOUR;
-	rem %= SECS_PER_HOUR;
-	dest->minute = rem / 60;
-	dest->second = rem % 60;
-	y = 1970;
-
-#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
-#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
-
-	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
-		long int yg = y + days / 365 - (days % 365 < 0);
-
-		/* Adjust DAYS and Y to match the guessed year.  */
-		days -= ((yg - y) * 365
-			 + LEAPS_THRU_END_OF(yg - 1)
-			 - LEAPS_THRU_END_OF(y - 1));
-		y = yg;
-	}
-	dest->year = cpu_to_le16(y);
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < (long int)ip[y]; --y)
-		continue;
-	days -= ip[y];
-	dest->month = y + 1;
-	dest->day = days + 1;
-
+	seconds = ts.tv_sec + offset * 60;
+	time64_to_tm(seconds, 0, &tm);
+	dest->year = cpu_to_le16(tm.tm_year + 1900);
+	dest->month = tm.tm_mon + 1;
+	dest->day = tm.tm_mday;
+	dest->hour = tm.tm_hour;
+	dest->minute = tm.tm_min;
+	dest->second = tm.tm_sec;
 	dest->centiseconds = ts.tv_nsec / 10000000;
 	dest->hundredsOfMicroseconds = (ts.tv_nsec / 1000 -
 					dest->centiseconds * 10000) / 100;

diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 35faf12..1b98cfa 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig

@@ -96,3 +96,16 @@
 	  not useful unless you are debugging a particular problem.
 
 	  Say N unless you are an XFS developer, or you play one on TV.
+
+config XFS_ASSERT_FATAL
+	bool "XFS fatal asserts"
+	default y
+	depends on XFS_FS && XFS_DEBUG
+	help
+	  Set the default DEBUG mode ASSERT failure behavior.
+
+	  Say Y here to cause DEBUG mode ASSERT failures to result in fatal
+	  errors that BUG() the kernel by default. If you say N, ASSERT failures
+	  result in warnings.
+
+	  This behavior can be modified at runtime via sysfs.

diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index d6ea520..4d85992 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h

@@ -54,6 +54,16 @@ kmem_flags_convert(xfs_km_flags_t flags)
 			lflags &= ~__GFP_FS;
 	}
 
+	/*
+	 * Default page/slab allocator behavior is to retry for ever
+	 * for small allocations. We can override this behavior by using
+	 * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
+	 * as it is feasible but rather fail than retry forever for all
+	 * request sizes.
+	 */
+	if (flags & KM_MAYFAIL)
+		lflags |= __GFP_RETRY_MAYFAIL;
+
 	if (flags & KM_ZERO)
 		lflags |= __GFP_ZERO;
 

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 33db69b..b008ff3 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c

@@ -111,8 +111,7 @@ xfs_ag_resv_critical(
 
 	/* Critically low if less than 10% or max btree height remains. */
 	return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
-			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL,
-			XFS_RANDOM_AG_RESV_CRITICAL);
+			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
 }
 
 /*

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7486401..744dcae 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c

@@ -606,7 +606,7 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = {
 /*
  * Read in the allocation group free block array.
  */
-STATIC int				/* error */
+int					/* error */
 xfs_alloc_read_agfl(
 	xfs_mount_t	*mp,		/* mount point structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
@@ -2454,8 +2454,7 @@ xfs_agf_read_verify(
 	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
 		xfs_buf_ioerror(bp, -EFSBADCRC);
 	else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
-				XFS_ERRTAG_ALLOC_READ_AGF,
-				XFS_RANDOM_ALLOC_READ_AGF))
+				XFS_ERRTAG_ALLOC_READ_AGF))
 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 
 	if (bp->b_error)
@@ -2842,8 +2841,7 @@ xfs_free_extent(
 	ASSERT(type != XFS_AG_RESV_AGFL);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_FREE_EXTENT,
-			XFS_RANDOM_FREE_EXTENT))
+			XFS_ERRTAG_FREE_EXTENT))
 		return -EIO;
 
 	error = xfs_free_extent_fix_freelist(tp, agno, &agbp);

diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 77d9c27..ef26edc 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h

@@ -213,6 +213,8 @@ xfs_alloc_get_rec(
 
 int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
+			xfs_agnumber_t agno, struct xfs_buf **bpp);
 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
 		struct xfs_buf **agbp);

diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index e1fcfe7..cfde0a0 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c

@@ -253,7 +253,7 @@ xfs_allocbt_init_ptr_from_cur(
 	ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bnobt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -261,42 +261,42 @@ xfs_bnobt_key_diff(
 	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
 	xfs_alloc_key_t		*kp = &key->alloc;
 
-	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_cntbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
 	xfs_alloc_rec_incore_t	*rec = &cur->bc_rec.a;
 	xfs_alloc_key_t		*kp = &key->alloc;
-	__int64_t		diff;
+	int64_t			diff;
 
-	diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
+	diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
 	if (diff)
 		return diff;
 
-	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+	return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bnobt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+	return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
 			  be32_to_cpu(k2->alloc.ar_startblock);
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_cntbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	__int64_t		diff;
+	int64_t			diff;
 
 	diff =  be32_to_cpu(k1->alloc.ar_blockcount) -
 		be32_to_cpu(k2->alloc.ar_blockcount);
@@ -395,7 +395,6 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
 };
 
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_bnobt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -442,7 +441,6 @@ xfs_cntbt_recs_inorder(
 		 be32_to_cpu(r1->alloc.ar_startblock) <
 		 be32_to_cpu(r2->alloc.ar_startblock));
 }
-#endif /* DEBUG */
 
 static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.rec_len		= sizeof(xfs_alloc_rec_t),
@@ -462,10 +460,8 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
 	.key_diff		= xfs_bnobt_key_diff,
 	.buf_ops		= &xfs_allocbt_buf_ops,
 	.diff_two_keys		= xfs_bnobt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_bnobt_keys_inorder,
 	.recs_inorder		= xfs_bnobt_recs_inorder,
-#endif
 };
 
 static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -486,10 +482,8 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
 	.key_diff		= xfs_cntbt_key_diff,
 	.buf_ops		= &xfs_allocbt_buf_ops,
 	.diff_two_keys		= xfs_cntbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_cntbt_keys_inorder,
 	.recs_inorder		= xfs_cntbt_recs_inorder,
-#endif
 };
 
 /*

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6622d46..ef8a1c7 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c

@@ -114,6 +114,23 @@ xfs_inode_hasattr(
  * Overall external interface routines.
  *========================================================================*/
 
+/* Retrieve an extended attribute and its value.  Must have iolock. */
+int
+xfs_attr_get_ilocked(
+	struct xfs_inode	*ip,
+	struct xfs_da_args	*args)
+{
+	if (!xfs_inode_hasattr(ip))
+		return -ENOATTR;
+	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+		return xfs_attr_shortform_getvalue(args);
+	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
+		return xfs_attr_leaf_get(args);
+	else
+		return xfs_attr_node_get(args);
+}
+
+/* Retrieve an extended attribute by name, and its value. */
 int
 xfs_attr_get(
 	struct xfs_inode	*ip,
@@ -141,14 +158,7 @@ xfs_attr_get(
 	args.op_flags = XFS_DA_OP_OKNOENT;
 
 	lock_mode = xfs_ilock_attr_map_shared(ip);
-	if (!xfs_inode_hasattr(ip))
-		error = -ENOATTR;
-	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
-		error = xfs_attr_shortform_getvalue(&args);
-	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
-		error = xfs_attr_leaf_get(&args);
-	else
-		error = xfs_attr_node_get(&args);
+	error = xfs_attr_get_ilocked(ip, &args);
 	xfs_iunlock(ip, lock_mode);
 
 	*valuelenp = args.valuelen;

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 2852521..c6c15e5 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c

@@ -351,7 +351,7 @@ xfs_attr3_leaf_read(
 
 	err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
 				XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
 	return err;
 }

diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d52f525..5236d8e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c

@@ -253,7 +253,7 @@ xfs_attr_rmtval_copyout(
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	__uint8_t	**dst)
+	uint8_t		**dst)
 {
 	char		*src = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -301,7 +301,7 @@ xfs_attr_rmtval_copyin(
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	__uint8_t	**src)
+	uint8_t		**src)
 {
 	char		*dst = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -355,7 +355,7 @@ xfs_attr_rmtval_get(
 	struct xfs_mount	*mp = args->dp->i_mount;
 	struct xfs_buf		*bp;
 	xfs_dablk_t		lblkno = args->rmtblkno;
-	__uint8_t		*dst = args->value;
+	uint8_t			*dst = args->value;
 	int			valuelen;
 	int			nmap;
 	int			error;
@@ -386,7 +386,8 @@ xfs_attr_rmtval_get(
 			       (map[i].br_startblock != HOLESTARTBLOCK));
 			dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
 			dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+			error = xfs_trans_read_buf(mp, args->trans,
+						   mp->m_ddev_targp,
 						   dblkno, dblkcnt, 0, &bp,
 						   &xfs_attr3_rmt_buf_ops);
 			if (error)
@@ -395,7 +396,7 @@ xfs_attr_rmtval_get(
 			error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
 							&offset, &valuelen,
 							&dst);
-			xfs_buf_relse(bp);
+			xfs_trans_brelse(args->trans, bp);
 			if (error)
 				return error;
 
@@ -421,7 +422,7 @@ xfs_attr_rmtval_set(
 	struct xfs_bmbt_irec	map;
 	xfs_dablk_t		lblkno;
 	xfs_fileoff_t		lfileoff = 0;
-	__uint8_t		*src = args->value;
+	uint8_t			*src = args->value;
 	int			blkcnt;
 	int			valuelen;
 	int			nmap;

diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 90928bb..afd684a 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h

@@ -31,10 +31,10 @@ typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
  * We generate this then sort it, attr_list() must return things in hash-order.
  */
 typedef struct xfs_attr_sf_sort {
-	__uint8_t	entno;		/* entry number in original list */
-	__uint8_t	namelen;	/* length of name value (no null) */
-	__uint8_t	valuelen;	/* length of value */
-	__uint8_t	flags;		/* flags bits (see xfs_attr_leaf.h) */
+	uint8_t		entno;		/* entry number in original list */
+	uint8_t		namelen;	/* length of name value (no null) */
+	uint8_t		valuelen;	/* length of value */
+	uint8_t		flags;		/* flags bits (see xfs_attr_leaf.h) */
 	xfs_dahash_t	hash;		/* this entry's hash value */
 	unsigned char	*name;		/* name value, pointer into buffer */
 } xfs_attr_sf_sort_t;
@@ -42,7 +42,7 @@ typedef struct xfs_attr_sf_sort {
 #define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen)	/* space name/value uses */ \
 	(((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)))
 #define XFS_ATTR_SF_ENTSIZE_MAX			/* max space for name&value */ \
-	((1 << (NBBY*(int)sizeof(__uint8_t))) - 1)
+	((1 << (NBBY*(int)sizeof(uint8_t))) - 1)
 #define XFS_ATTR_SF_ENTSIZE(sfep)		/* space an entry uses */ \
 	((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen)
 #define XFS_ATTR_SF_NEXTENTRY(sfep)		/* next entry in struct */ \

diff --git a/fs/xfs/libxfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0..61c6b20 100644
--- a/fs/xfs/libxfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h

@@ -25,47 +25,47 @@
 /*
  * masks with n high/low bits set, 64-bit values
  */
-static inline __uint64_t xfs_mask64hi(int n)
+static inline uint64_t xfs_mask64hi(int n)
 {
-	return (__uint64_t)-1 << (64 - (n));
+	return (uint64_t)-1 << (64 - (n));
 }
-static inline __uint32_t xfs_mask32lo(int n)
+static inline uint32_t xfs_mask32lo(int n)
 {
-	return ((__uint32_t)1 << (n)) - 1;
+	return ((uint32_t)1 << (n)) - 1;
 }
-static inline __uint64_t xfs_mask64lo(int n)
+static inline uint64_t xfs_mask64lo(int n)
 {
-	return ((__uint64_t)1 << (n)) - 1;
+	return ((uint64_t)1 << (n)) - 1;
 }
 
 /* Get high bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_highbit32(__uint32_t v)
+static inline int xfs_highbit32(uint32_t v)
 {
 	return fls(v) - 1;
 }
 
 /* Get high bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_highbit64(__uint64_t v)
+static inline int xfs_highbit64(uint64_t v)
 {
 	return fls64(v) - 1;
 }
 
 /* Get low bit set out of 32-bit argument, -1 if none set */
-static inline int xfs_lowbit32(__uint32_t v)
+static inline int xfs_lowbit32(uint32_t v)
 {
 	return ffs(v) - 1;
 }
 
 /* Get low bit set out of 64-bit argument, -1 if none set */
-static inline int xfs_lowbit64(__uint64_t v)
+static inline int xfs_lowbit64(uint64_t v)
 {
-	__uint32_t	w = (__uint32_t)v;
+	uint32_t	w = (uint32_t)v;
 	int		n = 0;
 
 	if (w) {	/* lower bits */
 		n = ffs(w);
 	} else {	/* upper bits */
-		w = (__uint32_t)(v >> 32);
+		w = (uint32_t)(v >> 32);
 		if (w) {
 			n = ffs(w);
 			if (n)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a7048eaf..0a98807 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c

@@ -3992,7 +3992,7 @@ xfs_bmapi_read(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -4473,7 +4473,7 @@ xfs_bmapi_write(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -4694,7 +4694,7 @@ xfs_bmapi_remap(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
 	}
@@ -5434,6 +5434,7 @@ __xfs_bunmapi(
 	int			whichfork;	/* data or attribute fork */
 	xfs_fsblock_t		sum;
 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
+	xfs_fileoff_t		max_len;
 
 	trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
 
@@ -5455,6 +5456,16 @@ __xfs_bunmapi(
 	ASSERT(len > 0);
 	ASSERT(nexts >= 0);
 
+	/*
+	 * Guesstimate how many blocks we can unmap without running the risk of
+	 * blowing out the transaction with a mix of EFIs and reflink
+	 * adjustments.
+	 */
+	if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+		max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
+	else
+		max_len = len;
+
 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
 	    (error = xfs_iread_extents(tp, ip, whichfork)))
 		return error;
@@ -5499,7 +5510,7 @@ __xfs_bunmapi(
 
 	extno = 0;
 	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
-	       (nexts == 0 || extno < nexts)) {
+	       (nexts == 0 || extno < nexts) && max_len > 0) {
 		/*
 		 * Is the found extent after a hole in which bno lives?
 		 * Just back up to the previous extent, if so.
@@ -5531,6 +5542,15 @@ __xfs_bunmapi(
 		}
 		if (del.br_startoff + del.br_blockcount > bno + 1)
 			del.br_blockcount = bno + 1 - del.br_startoff;
+
+		/* How much can we safely unmap? */
+		if (max_len < del.br_blockcount) {
+			del.br_startoff += del.br_blockcount - max_len;
+			if (!wasdel)
+				del.br_startblock += del.br_blockcount - max_len;
+			del.br_blockcount = max_len;
+		}
+
 		sum = del.br_startblock + del.br_blockcount;
 		if (isrt &&
 		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
@@ -5707,6 +5727,7 @@ __xfs_bunmapi(
 		if (!isrt && wasdel)
 			xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
 
+		max_len -= del.br_blockcount;
 		bno = del.br_startoff - 1;
 nodelete:
 		/*
@@ -6077,7 +6098,7 @@ xfs_bmap_shift_extents(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmap_shift_extents",
 				 XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
@@ -6229,7 +6250,7 @@ xfs_bmap_split_extent_at(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
 				 XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
@@ -6472,33 +6493,33 @@ xfs_bmap_finish_one(
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
-	xfs_filblks_t			blockcount,
+	xfs_filblks_t			*blockcount,
 	xfs_exntst_t			state)
 {
-	int				error = 0, done;
+	xfs_fsblock_t			firstfsb;
+	int				error = 0;
 
 	trace_xfs_bmap_deferred(tp->t_mountp,
 			XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
 			XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
-			ip->i_ino, whichfork, startoff, blockcount, state);
+			ip->i_ino, whichfork, startoff, *blockcount, state);
 
 	if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
 		return -EFSCORRUPTED;
 
 	if (XFS_TEST_ERROR(false, tp->t_mountp,
-			XFS_ERRTAG_BMAP_FINISH_ONE,
-			XFS_RANDOM_BMAP_FINISH_ONE))
+			XFS_ERRTAG_BMAP_FINISH_ONE))
 		return -EIO;
 
 	switch (type) {
 	case XFS_BMAP_MAP:
-		error = xfs_bmapi_remap(tp, ip, startoff, blockcount,
+		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
 				startblock, dfops);
+		*blockcount = 0;
 		break;
 	case XFS_BMAP_UNMAP:
-		error = xfs_bunmapi(tp, ip, startoff, blockcount,
-				XFS_BMAPI_REMAP, 1, &startblock, dfops, &done);
-		ASSERT(done);
+		error = __xfs_bunmapi(tp, ip, startoff, blockcount,
+				XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
 		break;
 	default:
 		ASSERT(0);

diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index c35a14f..851982a 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h

@@ -271,7 +271,7 @@ struct xfs_bmap_intent {
 int	xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_defer_ops *dfops,
 		struct xfs_inode *ip, enum xfs_bmap_intent_type type,
 		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
-		xfs_filblks_t blockcount, xfs_exntst_t state);
+		xfs_filblks_t *blockcount, xfs_exntst_t state);
 int	xfs_bmap_map_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		struct xfs_inode *ip, struct xfs_bmbt_irec *imap);
 int	xfs_bmap_unmap_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,

diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6cba69a..85de225 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c

@@ -94,8 +94,8 @@ xfs_bmdr_to_bmbt(
  */
 STATIC void
 __xfs_bmbt_get_all(
-		__uint64_t l0,
-		__uint64_t l1,
+		uint64_t l0,
+		uint64_t l1,
 		xfs_bmbt_irec_t *s)
 {
 	int	ext_flag;
@@ -573,6 +573,16 @@ xfs_bmbt_init_key_from_rec(
 }
 
 STATIC void
+xfs_bmbt_init_high_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	key->bmbt.br_startoff = cpu_to_be64(
+			xfs_bmbt_disk_get_startoff(&rec->bmbt) +
+			xfs_bmbt_disk_get_blockcount(&rec->bmbt) - 1);
+}
+
+STATIC void
 xfs_bmbt_init_rec_from_cur(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_rec	*rec)
@@ -588,15 +598,25 @@ xfs_bmbt_init_ptr_from_cur(
 	ptr->l = 0;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_bmbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
-	return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
+	return (int64_t)be64_to_cpu(key->bmbt.br_startoff) -
 				      cur->bc_rec.b.br_startoff;
 }
 
+STATIC int64_t
+xfs_bmbt_diff_two_keys(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
+			  be64_to_cpu(k2->bmbt.br_startoff);
+}
+
 static bool
 xfs_bmbt_verify(
 	struct xfs_buf		*bp)
@@ -687,7 +707,6 @@ const struct xfs_buf_ops xfs_bmbt_buf_ops = {
 };
 
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_bmbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -708,7 +727,6 @@ xfs_bmbt_recs_inorder(
 		xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
 		xfs_bmbt_disk_get_startoff(&r2->bmbt);
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.rec_len		= sizeof(xfs_bmbt_rec_t),
@@ -722,14 +740,14 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
 	.get_minrecs		= xfs_bmbt_get_minrecs,
 	.get_dmaxrecs		= xfs_bmbt_get_dmaxrecs,
 	.init_key_from_rec	= xfs_bmbt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_bmbt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_bmbt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_bmbt_init_ptr_from_cur,
 	.key_diff		= xfs_bmbt_key_diff,
+	.diff_two_keys		= xfs_bmbt_diff_two_keys,
 	.buf_ops		= &xfs_bmbt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_bmbt_keys_inorder,
 	.recs_inorder		= xfs_bmbt_recs_inorder,
-#endif
 };
 
 /*

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3a673ba..4da85ff 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c

@@ -43,7 +43,7 @@ kmem_zone_t	*xfs_btree_cur_zone;
 /*
  * Btree magic numbers.
  */
-static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
+static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
 	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
 	  XFS_FIBT_MAGIC, 0 },
 	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
@@ -51,12 +51,12 @@ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
 	  XFS_REFC_CRC_MAGIC }
 };
 
-__uint32_t
+uint32_t
 xfs_btree_magic(
 	int			crc,
 	xfs_btnum_t		btnum)
 {
-	__uint32_t		magic = xfs_magics[crc][btnum];
+	uint32_t		magic = xfs_magics[crc][btnum];
 
 	/* Ensure we asked for crc for crc-only magics. */
 	ASSERT(magic != 0);
@@ -101,8 +101,7 @@ xfs_btree_check_lblock(
 			be64_to_cpu(block->bb_u.l.bb_rightsib)));
 
 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
-			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
-			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
+			XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
 		if (bp)
 			trace_xfs_btree_corrupt(bp, _RET_IP_);
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -153,8 +152,7 @@ xfs_btree_check_sblock(
 		block->bb_u.s.bb_rightsib;
 
 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
-			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
-			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
+			XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
 		if (bp)
 			trace_xfs_btree_corrupt(bp, _RET_IP_);
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
@@ -568,7 +566,7 @@ xfs_btree_ptr_offset(
 /*
  * Return a pointer to the n-th record in the btree block.
  */
-STATIC union xfs_btree_rec *
+union xfs_btree_rec *
 xfs_btree_rec_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -581,7 +579,7 @@ xfs_btree_rec_addr(
 /*
  * Return a pointer to the n-th key in the btree block.
  */
-STATIC union xfs_btree_key *
+union xfs_btree_key *
 xfs_btree_key_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -594,7 +592,7 @@ xfs_btree_key_addr(
 /*
  * Return a pointer to the n-th high key in the btree block.
  */
-STATIC union xfs_btree_key *
+union xfs_btree_key *
 xfs_btree_high_key_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -607,7 +605,7 @@ xfs_btree_high_key_addr(
 /*
  * Return a pointer to the n-th block pointer in the btree block.
  */
-STATIC union xfs_btree_ptr *
+union xfs_btree_ptr *
 xfs_btree_ptr_addr(
 	struct xfs_btree_cur	*cur,
 	int			n,
@@ -641,7 +639,7 @@ xfs_btree_get_iroot(
  * Retrieve the block pointer from the cursor at the given level.
  * This may be an inode btree root or from a buffer.
  */
-STATIC struct xfs_btree_block *		/* generic btree block pointer */
+struct xfs_btree_block *		/* generic btree block pointer */
 xfs_btree_get_block(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			level,	/* level in btree */
@@ -778,14 +776,14 @@ xfs_btree_lastrec(
  */
 void
 xfs_btree_offsets(
-	__int64_t	fields,		/* bitmask of fields */
+	int64_t		fields,		/* bitmask of fields */
 	const short	*offsets,	/* table of field offsets */
 	int		nbits,		/* number of bits to inspect */
 	int		*first,		/* output: first byte offset */
 	int		*last)		/* output: last byte offset */
 {
 	int		i;		/* current bit number */
-	__int64_t	imask;		/* mask for current bit number */
+	int64_t		imask;		/* mask for current bit number */
 
 	ASSERT(fields != 0);
 	/*
@@ -1756,7 +1754,7 @@ xfs_btree_decrement(
 	return error;
 }
 
-STATIC int
+int
 xfs_btree_lookup_get_block(
 	struct xfs_btree_cur	*cur,	/* btree cursor */
 	int			level,	/* level in the btree */
@@ -1846,7 +1844,7 @@ xfs_btree_lookup(
 	int			*stat)	/* success/failure */
 {
 	struct xfs_btree_block	*block;	/* current btree block */
-	__int64_t		diff;	/* difference for the current key */
+	int64_t			diff;	/* difference for the current key */
 	int			error;	/* error return value */
 	int			keyno;	/* current key number */
 	int			level;	/* level in the btree */
@@ -4435,7 +4433,7 @@ xfs_btree_visit_blocks(
  * recovery completion writes the changes to disk.
  */
 struct xfs_btree_block_change_owner_info {
-	__uint64_t		new_owner;
+	uint64_t		new_owner;
 	struct list_head	*buffer_list;
 };
 
@@ -4481,7 +4479,7 @@ xfs_btree_block_change_owner(
 int
 xfs_btree_change_owner(
 	struct xfs_btree_cur	*cur,
-	__uint64_t		new_owner,
+	uint64_t		new_owner,
 	struct list_head	*buffer_list)
 {
 	struct xfs_btree_block_change_owner_info	bbcoi;
@@ -4585,7 +4583,7 @@ xfs_btree_simple_query_range(
 {
 	union xfs_btree_rec		*recp;
 	union xfs_btree_key		rec_key;
-	__int64_t			diff;
+	int64_t				diff;
 	int				stat;
 	bool				firstrec = true;
 	int				error;
@@ -4682,8 +4680,8 @@ xfs_btree_overlapped_query_range(
 	union xfs_btree_key		*hkp;
 	union xfs_btree_rec		*recp;
 	struct xfs_btree_block		*block;
-	__int64_t			ldiff;
-	__int64_t			hdiff;
+	int64_t				ldiff;
+	int64_t				hdiff;
 	int				level;
 	struct xfs_buf			*bp;
 	int				i;
@@ -4849,12 +4847,14 @@ xfs_btree_query_all(
 	xfs_btree_query_range_fn	fn,
 	void				*priv)
 {
-	union xfs_btree_irec		low_rec;
-	union xfs_btree_irec		high_rec;
+	union xfs_btree_key		low_key;
+	union xfs_btree_key		high_key;
 
-	memset(&low_rec, 0, sizeof(low_rec));
-	memset(&high_rec, 0xFF, sizeof(high_rec));
-	return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
+	memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
+	memset(&low_key, 0, sizeof(low_key));
+	memset(&high_key, 0xFF, sizeof(high_key));
+
+	return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
 }
 
 /*

diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 27bed08..9c95e96 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h

@@ -76,7 +76,7 @@ union xfs_btree_rec {
 #define	XFS_BTNUM_RMAP	((xfs_btnum_t)XFS_BTNUM_RMAPi)
 #define	XFS_BTNUM_REFC	((xfs_btnum_t)XFS_BTNUM_REFCi)
 
-__uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
 
 /*
  * For logging record fields.
@@ -150,20 +150,19 @@ struct xfs_btree_ops {
 					  union xfs_btree_rec *rec);
 
 	/* difference between key value and cursor value */
-	__int64_t (*key_diff)(struct xfs_btree_cur *cur,
+	int64_t (*key_diff)(struct xfs_btree_cur *cur,
 			      union xfs_btree_key *key);
 
 	/*
 	 * Difference between key2 and key1 -- positive if key1 > key2,
 	 * negative if key1 < key2, and zero if equal.
 	 */
-	__int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
+	int64_t (*diff_two_keys)(struct xfs_btree_cur *cur,
 				   union xfs_btree_key *key1,
 				   union xfs_btree_key *key2);
 
 	const struct xfs_buf_ops	*buf_ops;
 
-#if defined(DEBUG) || defined(XFS_WARN)
 	/* check that k1 is lower than k2 */
 	int	(*keys_inorder)(struct xfs_btree_cur *cur,
 				union xfs_btree_key *k1,
@@ -173,7 +172,6 @@ struct xfs_btree_ops {
 	int	(*recs_inorder)(struct xfs_btree_cur *cur,
 				union xfs_btree_rec *r1,
 				union xfs_btree_rec *r2);
-#endif
 };
 
 /*
@@ -213,11 +211,11 @@ typedef struct xfs_btree_cur
 	union xfs_btree_irec	bc_rec;	/* current insert/search record value */
 	struct xfs_buf	*bc_bufs[XFS_BTREE_MAXLEVELS];	/* buf ptr per level */
 	int		bc_ptrs[XFS_BTREE_MAXLEVELS];	/* key/record # */
-	__uint8_t	bc_ra[XFS_BTREE_MAXLEVELS];	/* readahead bits */
+	uint8_t		bc_ra[XFS_BTREE_MAXLEVELS];	/* readahead bits */
 #define	XFS_BTCUR_LEFTRA	1	/* left sibling has been read-ahead */
 #define	XFS_BTCUR_RIGHTRA	2	/* right sibling has been read-ahead */
-	__uint8_t	bc_nlevels;	/* number of levels in the tree */
-	__uint8_t	bc_blocklog;	/* log2(blocksize) of btree blocks */
+	uint8_t		bc_nlevels;	/* number of levels in the tree */
+	uint8_t		bc_blocklog;	/* log2(blocksize) of btree blocks */
 	xfs_btnum_t	bc_btnum;	/* identifies which btree type */
 	int		bc_statoff;	/* offset of btre stats array */
 	union {
@@ -330,7 +328,7 @@ xfs_btree_islastblock(
  */
 void
 xfs_btree_offsets(
-	__int64_t		fields,	/* bitmask of fields */
+	int64_t			fields,	/* bitmask of fields */
 	const short		*offsets,/* table of field offsets */
 	int			nbits,	/* number of bits to inspect */
 	int			*first,	/* output: first byte offset */
@@ -408,7 +406,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
 int xfs_btree_insert(struct xfs_btree_cur *, int *);
 int xfs_btree_delete(struct xfs_btree_cur *, int *);
 int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
-int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner,
 			   struct list_head *buffer_list);
 
 /*
@@ -434,7 +432,7 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
 }
 
 static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
-		__uint16_t numrecs)
+		uint16_t numrecs)
 {
 	block->bb_numrecs = cpu_to_be16(numrecs);
 }
@@ -506,4 +504,17 @@ int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
 
 int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
 
+union xfs_btree_rec *xfs_btree_rec_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_key_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_key *xfs_btree_high_key_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+union xfs_btree_ptr *xfs_btree_ptr_addr(struct xfs_btree_cur *cur, int n,
+		struct xfs_btree_block *block);
+int xfs_btree_lookup_get_block(struct xfs_btree_cur *cur, int level,
+		union xfs_btree_ptr *pp, struct xfs_btree_block **blkp);
+struct xfs_btree_block *xfs_btree_get_block(struct xfs_btree_cur *cur,
+		int level, struct xfs_buf **bpp);
+
 #endif	/* __XFS_BTREE_H__ */

diff --git a/fs/xfs/libxfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index a416c7c..8211f48 100644
--- a/fs/xfs/libxfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h

@@ -1,7 +1,7 @@
 #ifndef _XFS_CKSUM_H
 #define _XFS_CKSUM_H 1
 
-#define XFS_CRC_SEED	(~(__uint32_t)0)
+#define XFS_CRC_SEED	(~(uint32_t)0)
 
 /*
  * Calculate the intermediate checksum for a buffer that has the CRC field
@@ -9,11 +9,11 @@
  * cksum_offset parameter. We do not modify the buffer during verification,
  * hence we have to split the CRC calculation across the cksum_offset.
  */
-static inline __uint32_t
+static inline uint32_t
 xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t zero = 0;
-	__uint32_t crc;
+	uint32_t zero = 0;
+	uint32_t crc;
 
 	/* Calculate CRC up to the checksum. */
 	crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset);
@@ -30,7 +30,7 @@ xfs_start_cksum_safe(char *buffer, size_t length, unsigned long cksum_offset)
  * Fast CRC method where the buffer is modified. Callers must have exclusive
  * access to the buffer while the calculation takes place.
  */
-static inline __uint32_t
+static inline uint32_t
 xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
 {
 	/* zero the CRC field */
@@ -48,7 +48,7 @@ xfs_start_cksum_update(char *buffer, size_t length, unsigned long cksum_offset)
  * so that it is consistent on disk.
  */
 static inline __le32
-xfs_end_cksum(__uint32_t crc)
+xfs_end_cksum(uint32_t crc)
 {
 	return ~cpu_to_le32(crc);
 }
@@ -62,7 +62,7 @@ xfs_end_cksum(__uint32_t crc)
 static inline void
 xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
+	uint32_t crc = xfs_start_cksum_update(buffer, length, cksum_offset);
 
 	*(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc);
 }
@@ -73,7 +73,7 @@ xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 static inline int
 xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset)
 {
-	__uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
+	uint32_t crc = xfs_start_cksum_safe(buffer, length, cksum_offset);
 
 	return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc);
 }

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 1bdf288..6d43358 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c

@@ -263,7 +263,7 @@ xfs_da3_node_read(
 
 	err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
 					which_fork, &xfs_da3_node_buf_ops);
-	if (!err && tp) {
+	if (!err && tp && *bpp) {
 		struct xfs_da_blkinfo	*info = (*bpp)->b_addr;
 		int			type;
 
@@ -1282,7 +1282,7 @@ xfs_da3_fixhashpath(
 			return;
 		break;
 	case XFS_DIR2_LEAFN_MAGIC:
-		lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
+		lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count);
 		if (count == 0)
 			return;
 		break;
@@ -1502,8 +1502,8 @@ xfs_da3_node_lookup_int(
 		if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
 		    blk->magic == XFS_DIR3_LEAFN_MAGIC) {
 			blk->magic = XFS_DIR2_LEAFN_MAGIC;
-			blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-							       blk->bp, NULL);
+			blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+							      blk->bp, NULL);
 			break;
 		}
 
@@ -1929,8 +1929,8 @@ xfs_da3_path_shift(
 			blk->magic = XFS_DIR2_LEAFN_MAGIC;
 			ASSERT(level == path->active-1);
 			blk->index = 0;
-			blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
-							       blk->bp, NULL);
+			blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
+							      blk->bp, NULL);
 			break;
 		default:
 			ASSERT(0);
@@ -1952,7 +1952,7 @@ xfs_da3_path_shift(
  * This is implemented with some source-level loop unrolling.
  */
 xfs_dahash_t
-xfs_da_hashname(const __uint8_t *name, int namelen)
+xfs_da_hashname(const uint8_t *name, int namelen)
 {
 	xfs_dahash_t hash;
 

diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 4e29cb6..ae6de17 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h

@@ -60,10 +60,10 @@ enum xfs_dacmp {
  */
 typedef struct xfs_da_args {
 	struct xfs_da_geometry *geo;	/* da block geometry */
-	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
+	const uint8_t		*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
-	__uint8_t	filetype;	/* filetype of inode for directories */
-	__uint8_t	*value;		/* set of bytes (maybe contain NULLs) */
+	uint8_t		filetype;	/* filetype of inode for directories */
+	uint8_t		*value;		/* set of bytes (maybe contain NULLs) */
 	int		valuelen;	/* length of value */
 	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
 	xfs_dahash_t	hashval;	/* hash value of name */
@@ -207,7 +207,7 @@ int	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
 int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 					  struct xfs_buf *dead_buf);
 
-uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
+uint xfs_da_hashname(const uint8_t *name_string, int name_length);
 enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
 				const unsigned char *name, int len);
 

diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index f1e8d4d..6d77d1a 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c

@@ -49,7 +49,7 @@ xfs_dir3_sf_entsize(
 	struct xfs_dir2_sf_hdr	*hdr,
 	int			len)
 {
-	return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t);
+	return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
 }
 
 static struct xfs_dir2_sf_entry *
@@ -77,7 +77,7 @@ xfs_dir3_sf_nextentry(
  * not necessary. For non-filetype enable directories, the type is always
  * unknown and we never store the value.
  */
-static __uint8_t
+static uint8_t
 xfs_dir2_sfe_get_ftype(
 	struct xfs_dir2_sf_entry *sfep)
 {
@@ -87,16 +87,16 @@ xfs_dir2_sfe_get_ftype(
 static void
 xfs_dir2_sfe_put_ftype(
 	struct xfs_dir2_sf_entry *sfep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir3_sfe_get_ftype(
 	struct xfs_dir2_sf_entry *sfep)
 {
-	__uint8_t	ftype;
+	uint8_t		ftype;
 
 	ftype = sfep->name[sfep->namelen];
 	if (ftype >= XFS_DIR3_FT_MAX)
@@ -107,7 +107,7 @@ xfs_dir3_sfe_get_ftype(
 static void
 xfs_dir3_sfe_put_ftype(
 	struct xfs_dir2_sf_entry *sfep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 
@@ -124,7 +124,7 @@ xfs_dir3_sfe_put_ftype(
 static xfs_ino_t
 xfs_dir2_sf_get_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	__uint8_t		*from)
+	uint8_t			*from)
 {
 	if (hdr->i8count)
 		return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
@@ -135,7 +135,7 @@ xfs_dir2_sf_get_ino(
 static void
 xfs_dir2_sf_put_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
-	__uint8_t		*to,
+	uint8_t			*to,
 	xfs_ino_t		ino)
 {
 	ASSERT((ino & 0xff00000000000000ULL) == 0);
@@ -225,7 +225,7 @@ xfs_dir3_sfe_put_ino(
 
 #define XFS_DIR3_DATA_ENTSIZE(n)					\
 	round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) +	\
-		 sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)),	\
+		 sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)),	\
 		XFS_DIR2_DATA_ALIGN)
 
 static int
@@ -242,7 +242,7 @@ xfs_dir3_data_entsize(
 	return XFS_DIR3_DATA_ENTSIZE(n);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir2_data_get_ftype(
 	struct xfs_dir2_data_entry *dep)
 {
@@ -252,16 +252,16 @@ xfs_dir2_data_get_ftype(
 static void
 xfs_dir2_data_put_ftype(
 	struct xfs_dir2_data_entry *dep,
-	__uint8_t		ftype)
+	uint8_t			ftype)
 {
 	ASSERT(ftype < XFS_DIR3_FT_MAX);
 }
 
-static __uint8_t
+static uint8_t
 xfs_dir3_data_get_ftype(
 	struct xfs_dir2_data_entry *dep)
 {
-	__uint8_t	ftype = dep->name[dep->namelen];
+	uint8_t		ftype = dep->name[dep->namelen];
 
 	if (ftype >= XFS_DIR3_FT_MAX)
 		return XFS_DIR3_FT_UNKNOWN;
@@ -271,7 +271,7 @@ xfs_dir3_data_get_ftype(
 static void
 xfs_dir3_data_put_ftype(
 	struct xfs_dir2_data_entry *dep,
-	__uint8_t		type)
+	uint8_t			type)
 {
 	ASSERT(type < XFS_DIR3_FT_MAX);
 	ASSERT(dep->namelen != 0);

diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 9a492a9..3771edc 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h

@@ -111,11 +111,11 @@ struct xfs_da3_intnode {
  * appropriate.
  */
 struct xfs_da3_icnode_hdr {
-	__uint32_t	forw;
-	__uint32_t	back;
-	__uint16_t	magic;
-	__uint16_t	count;
-	__uint16_t	level;
+	uint32_t	forw;
+	uint32_t	back;
+	uint16_t	magic;
+	uint16_t	count;
+	uint16_t	level;
 };
 
 /*
@@ -187,14 +187,14 @@ struct xfs_da3_icnode_hdr {
 /*
  * Byte offset in data block and shortform entry.
  */
-typedef	__uint16_t	xfs_dir2_data_off_t;
+typedef uint16_t	xfs_dir2_data_off_t;
 #define	NULLDATAOFF	0xffffU
 typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
 
 /*
  * Offset in data space of a data entry.
  */
-typedef	__uint32_t	xfs_dir2_dataptr_t;
+typedef uint32_t	xfs_dir2_dataptr_t;
 #define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
 #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
 
@@ -206,7 +206,7 @@ typedef	xfs_off_t	xfs_dir2_off_t;
 /*
  * Directory block number (logical dirblk in file)
  */
-typedef	__uint32_t	xfs_dir2_db_t;
+typedef uint32_t	xfs_dir2_db_t;
 
 #define XFS_INO32_SIZE	4
 #define XFS_INO64_SIZE	8
@@ -226,9 +226,9 @@ typedef	__uint32_t	xfs_dir2_db_t;
  * over them.
  */
 typedef struct xfs_dir2_sf_hdr {
-	__uint8_t		count;		/* count of entries */
-	__uint8_t		i8count;	/* count of 8-byte inode #s */
-	__uint8_t		parent[8];	/* parent dir inode number */
+	uint8_t			count;		/* count of entries */
+	uint8_t			i8count;	/* count of 8-byte inode #s */
+	uint8_t			parent[8];	/* parent dir inode number */
 } __packed xfs_dir2_sf_hdr_t;
 
 typedef struct xfs_dir2_sf_entry {
@@ -447,11 +447,11 @@ struct xfs_dir3_leaf_hdr {
 };
 
 struct xfs_dir3_icleaf_hdr {
-	__uint32_t		forw;
-	__uint32_t		back;
-	__uint16_t		magic;
-	__uint16_t		count;
-	__uint16_t		stale;
+	uint32_t		forw;
+	uint32_t		back;
+	uint16_t		magic;
+	uint16_t		count;
+	uint16_t		stale;
 };
 
 /*
@@ -538,10 +538,10 @@ struct xfs_dir3_free {
  * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
  */
 struct xfs_dir3_icfree_hdr {
-	__uint32_t	magic;
-	__uint32_t	firstdb;
-	__uint32_t	nvalid;
-	__uint32_t	nused;
+	uint32_t	magic;
+	uint32_t	firstdb;
+	uint32_t	nvalid;
+	uint32_t	nused;
 
 };
 
@@ -632,10 +632,10 @@ typedef struct xfs_attr_shortform {
 		__u8	padding;
 	} hdr;
 	struct xfs_attr_sf_entry {
-		__uint8_t namelen;	/* actual length of name (no NULL) */
-		__uint8_t valuelen;	/* actual length of value (no NULL) */
-		__uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
-		__uint8_t nameval[1];	/* name & value bytes concatenated */
+		uint8_t namelen;	/* actual length of name (no NULL) */
+		uint8_t valuelen;	/* actual length of value (no NULL) */
+		uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
+		uint8_t nameval[1];	/* name & value bytes concatenated */
 	} list[1];			/* variable sized array */
 } xfs_attr_shortform_t;
 
@@ -725,22 +725,22 @@ struct xfs_attr3_leafblock {
  * incore, neutral version of the attribute leaf header
  */
 struct xfs_attr3_icleaf_hdr {
-	__uint32_t	forw;
-	__uint32_t	back;
-	__uint16_t	magic;
-	__uint16_t	count;
-	__uint16_t	usedbytes;
+	uint32_t	forw;
+	uint32_t	back;
+	uint16_t	magic;
+	uint16_t	count;
+	uint16_t	usedbytes;
 	/*
 	 * firstused is 32-bit here instead of 16-bit like the on-disk variant
 	 * to support maximum fsb size of 64k without overflow issues throughout
 	 * the attr code. Instead, the overflow condition is handled on
 	 * conversion to/from disk.
 	 */
-	__uint32_t	firstused;
+	uint32_t	firstused;
 	__u8		holes;
 	struct {
-		__uint16_t	base;
-		__uint16_t	size;
+		uint16_t	base;
+		uint16_t	size;
 	} freemap[XFS_ATTR_LEAF_MAPSIZE];
 };
 

diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 2f389d36..ccf9783 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c

@@ -218,8 +218,7 @@ xfs_dir_ino_validate(
 		agblkno != 0 &&
 		ioff < (1 << mp->m_sb.sb_inopblog) &&
 		XFS_AGINO_TO_INO(mp, agno, agino) == ino;
-	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
-			XFS_RANDOM_DIR_INO_VALIDATE))) {
+	if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
 		xfs_warn(mp, "Invalid inode number 0x%Lx",
 				(unsigned long long) ino);
 		XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);

diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index d6e6d9d..21c8f8b 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h

@@ -47,9 +47,9 @@ struct xfs_dir_ops {
 	struct xfs_dir2_sf_entry *
 		(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
 				struct xfs_dir2_sf_entry *sfep);
-	__uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
+	uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
 	void	(*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
-				__uint8_t ftype);
+				uint8_t ftype);
 	xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
 				struct xfs_dir2_sf_entry *sfep);
 	void	(*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
@@ -60,9 +60,9 @@ struct xfs_dir_ops {
 				     xfs_ino_t ino);
 
 	int	(*data_entsize)(int len);
-	__uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
+	uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
 	void	(*data_put_ftype)(struct xfs_dir2_data_entry *dep,
-				__uint8_t ftype);
+				uint8_t ftype);
 	__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
 	struct xfs_dir2_data_free *
 		(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);

diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index aa17cb7..43c902f 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c

@@ -139,7 +139,7 @@ xfs_dir3_block_read(
 
 	err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
 				XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
 	return err;
 }

diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index b887fb2..27297a6 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c

@@ -145,7 +145,7 @@ xfs_dir3_leaf_check_int(
 static bool
 xfs_dir3_leaf_verify(
 	struct xfs_buf		*bp,
-	__uint16_t		magic)
+	uint16_t		magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
@@ -154,7 +154,7 @@ xfs_dir3_leaf_verify(
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
-		__uint16_t		magic3;
+		uint16_t		magic3;
 
 		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
 							 : XFS_DIR3_LEAFN_MAGIC;
@@ -178,7 +178,7 @@ xfs_dir3_leaf_verify(
 static void
 __read_verify(
 	struct xfs_buf  *bp,
-	__uint16_t	magic)
+	uint16_t	magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 
@@ -195,7 +195,7 @@ __read_verify(
 static void
 __write_verify(
 	struct xfs_buf  *bp,
-	__uint16_t	magic)
+	uint16_t	magic)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
@@ -256,7 +256,7 @@ const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
 	.verify_write = xfs_dir3_leafn_write_verify,
 };
 
-static int
+int
 xfs_dir3_leaf_read(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
@@ -268,7 +268,7 @@ xfs_dir3_leaf_read(
 
 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
 				XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
 	return err;
 }
@@ -285,7 +285,7 @@ xfs_dir3_leafn_read(
 
 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
 				XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
-	if (!err && tp)
+	if (!err && tp && *bpp)
 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
 	return err;
 }
@@ -299,7 +299,7 @@ xfs_dir3_leaf_init(
 	struct xfs_trans	*tp,
 	struct xfs_buf		*bp,
 	xfs_ino_t		owner,
-	__uint16_t		type)
+	uint16_t		type)
 {
 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
 
@@ -343,7 +343,7 @@ xfs_dir3_leaf_get_buf(
 	xfs_da_args_t		*args,
 	xfs_dir2_db_t		bno,
 	struct xfs_buf		**bpp,
-	__uint16_t		magic)
+	uint16_t		magic)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_trans	*tp = args->trans;

diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index bbd1238..682e2bf 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c

@@ -528,7 +528,7 @@ xfs_dir2_free_hdr_check(
  * Stale entries are ok.
  */
 xfs_dahash_t					/* hash value */
-xfs_dir2_leafn_lasthash(
+xfs_dir2_leaf_lasthash(
 	struct xfs_inode *dp,
 	struct xfs_buf	*bp,			/* leaf buffer */
 	int		*count)			/* count of entries in leaf */
@@ -540,7 +540,9 @@ xfs_dir2_leafn_lasthash(
 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
 
 	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
-	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
+	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
+	       leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
+	       leafhdr.magic == XFS_DIR3_LEAF1_MAGIC);
 
 	if (count)
 		*count = leafhdr.count;
@@ -1405,8 +1407,8 @@ xfs_dir2_leafn_split(
 	/*
 	 * Update last hashval in each block since we added the name.
 	 */
-	oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL);
-	newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL);
+	oldblk->hashval = xfs_dir2_leaf_lasthash(dp, oldblk->bp, NULL);
+	newblk->hashval = xfs_dir2_leaf_lasthash(dp, newblk->bp, NULL);
 	xfs_dir3_leaf_check(dp, oldblk->bp);
 	xfs_dir3_leaf_check(dp, newblk->bp);
 	return error;

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 39f8604f..4badd26 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h

@@ -58,6 +58,8 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
 		struct xfs_buf **bpp);
 
 /* xfs_dir2_leaf.c */
+extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
 extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
@@ -69,7 +71,7 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
 		struct xfs_dir2_leaf_entry *ents, int *indexp,
 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
 extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
-		struct xfs_buf **bpp, __uint16_t magic);
+		struct xfs_buf **bpp, uint16_t magic);
 extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
 		struct xfs_buf *bp, int first, int last);
 extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
@@ -93,7 +95,7 @@ extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,
 /* xfs_dir2_node.c */
 extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
 		struct xfs_buf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp,
+extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp,
 		struct xfs_buf *bp, int *count);
 extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
 		struct xfs_da_args *args, int *indexp,
@@ -128,7 +130,7 @@ extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
 extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
-extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
-		       size_t bufsize);
+extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
+		       struct dir_context *ctx, size_t bufsize);
 
 #endif /* __XFS_DIR2_PRIV_H__ */

diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e84af09..be8b975 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c

@@ -647,7 +647,7 @@ xfs_dir2_sf_verify(
 	int				offset;
 	int				size;
 	int				error;
-	__uint8_t			filetype;
+	uint8_t				filetype;
 
 	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
 	/*

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index a1dccd8..23229f0 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h

@@ -103,8 +103,8 @@ struct xfs_ifork;
  * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_sb {
-	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
-	__uint32_t	sb_blocksize;	/* logical block size, bytes */
+	uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
+	uint32_t	sb_blocksize;	/* logical block size, bytes */
 	xfs_rfsblock_t	sb_dblocks;	/* number of data blocks */
 	xfs_rfsblock_t	sb_rblocks;	/* number of realtime blocks */
 	xfs_rtblock_t	sb_rextents;	/* number of realtime extents */
@@ -118,45 +118,45 @@ typedef struct xfs_sb {
 	xfs_agnumber_t	sb_agcount;	/* number of allocation groups */
 	xfs_extlen_t	sb_rbmblocks;	/* number of rt bitmap blocks */
 	xfs_extlen_t	sb_logblocks;	/* number of log blocks */
-	__uint16_t	sb_versionnum;	/* header version == XFS_SB_VERSION */
-	__uint16_t	sb_sectsize;	/* volume sector size, bytes */
-	__uint16_t	sb_inodesize;	/* inode size, bytes */
-	__uint16_t	sb_inopblock;	/* inodes per block */
+	uint16_t	sb_versionnum;	/* header version == XFS_SB_VERSION */
+	uint16_t	sb_sectsize;	/* volume sector size, bytes */
+	uint16_t	sb_inodesize;	/* inode size, bytes */
+	uint16_t	sb_inopblock;	/* inodes per block */
 	char		sb_fname[12];	/* file system name */
-	__uint8_t	sb_blocklog;	/* log2 of sb_blocksize */
-	__uint8_t	sb_sectlog;	/* log2 of sb_sectsize */
-	__uint8_t	sb_inodelog;	/* log2 of sb_inodesize */
-	__uint8_t	sb_inopblog;	/* log2 of sb_inopblock */
-	__uint8_t	sb_agblklog;	/* log2 of sb_agblocks (rounded up) */
-	__uint8_t	sb_rextslog;	/* log2 of sb_rextents */
-	__uint8_t	sb_inprogress;	/* mkfs is in progress, don't mount */
-	__uint8_t	sb_imax_pct;	/* max % of fs for inode space */
+	uint8_t		sb_blocklog;	/* log2 of sb_blocksize */
+	uint8_t		sb_sectlog;	/* log2 of sb_sectsize */
+	uint8_t		sb_inodelog;	/* log2 of sb_inodesize */
+	uint8_t		sb_inopblog;	/* log2 of sb_inopblock */
+	uint8_t		sb_agblklog;	/* log2 of sb_agblocks (rounded up) */
+	uint8_t		sb_rextslog;	/* log2 of sb_rextents */
+	uint8_t		sb_inprogress;	/* mkfs is in progress, don't mount */
+	uint8_t		sb_imax_pct;	/* max % of fs for inode space */
 					/* statistics */
 	/*
 	 * These fields must remain contiguous.  If you really
 	 * want to change their layout, make sure you fix the
 	 * code in xfs_trans_apply_sb_deltas().
 	 */
-	__uint64_t	sb_icount;	/* allocated inodes */
-	__uint64_t	sb_ifree;	/* free inodes */
-	__uint64_t	sb_fdblocks;	/* free data blocks */
-	__uint64_t	sb_frextents;	/* free realtime extents */
+	uint64_t	sb_icount;	/* allocated inodes */
+	uint64_t	sb_ifree;	/* free inodes */
+	uint64_t	sb_fdblocks;	/* free data blocks */
+	uint64_t	sb_frextents;	/* free realtime extents */
 	/*
 	 * End contiguous fields.
 	 */
 	xfs_ino_t	sb_uquotino;	/* user quota inode */
 	xfs_ino_t	sb_gquotino;	/* group quota inode */
-	__uint16_t	sb_qflags;	/* quota flags */
-	__uint8_t	sb_flags;	/* misc. flags */
-	__uint8_t	sb_shared_vn;	/* shared version number */
+	uint16_t	sb_qflags;	/* quota flags */
+	uint8_t		sb_flags;	/* misc. flags */
+	uint8_t		sb_shared_vn;	/* shared version number */
 	xfs_extlen_t	sb_inoalignmt;	/* inode chunk alignment, fsblocks */
-	__uint32_t	sb_unit;	/* stripe or raid unit */
-	__uint32_t	sb_width;	/* stripe or raid width */
-	__uint8_t	sb_dirblklog;	/* log2 of dir block size (fsbs) */
-	__uint8_t	sb_logsectlog;	/* log2 of the log sector size */
-	__uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
-	__uint32_t	sb_logsunit;	/* stripe unit size for the log */
-	__uint32_t	sb_features2;	/* additional feature bits */
+	uint32_t	sb_unit;	/* stripe or raid unit */
+	uint32_t	sb_width;	/* stripe or raid width */
+	uint8_t		sb_dirblklog;	/* log2 of dir block size (fsbs) */
+	uint8_t		sb_logsectlog;	/* log2 of the log sector size */
+	uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
+	uint32_t	sb_logsunit;	/* stripe unit size for the log */
+	uint32_t	sb_features2;	/* additional feature bits */
 
 	/*
 	 * bad features2 field as a result of failing to pad the sb structure to
@@ -167,17 +167,17 @@ typedef struct xfs_sb {
 	 * the value in sb_features2 when formatting the incore superblock to
 	 * the disk buffer.
 	 */
-	__uint32_t	sb_bad_features2;
+	uint32_t	sb_bad_features2;
 
 	/* version 5 superblock fields start here */
 
 	/* feature masks */
-	__uint32_t	sb_features_compat;
-	__uint32_t	sb_features_ro_compat;
-	__uint32_t	sb_features_incompat;
-	__uint32_t	sb_features_log_incompat;
+	uint32_t	sb_features_compat;
+	uint32_t	sb_features_ro_compat;
+	uint32_t	sb_features_incompat;
+	uint32_t	sb_features_log_incompat;
 
-	__uint32_t	sb_crc;		/* superblock crc */
+	uint32_t	sb_crc;		/* superblock crc */
 	xfs_extlen_t	sb_spino_align;	/* sparse inode chunk alignment */
 
 	xfs_ino_t	sb_pquotino;	/* project quota inode */
@@ -449,7 +449,7 @@ static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)
 static inline bool
 xfs_sb_has_compat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_compat & feature) != 0;
 }
@@ -465,7 +465,7 @@ xfs_sb_has_compat_feature(
 static inline bool
 xfs_sb_has_ro_compat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_ro_compat & feature) != 0;
 }
@@ -482,7 +482,7 @@ xfs_sb_has_ro_compat_feature(
 static inline bool
 xfs_sb_has_incompat_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_incompat & feature) != 0;
 }
@@ -492,7 +492,7 @@ xfs_sb_has_incompat_feature(
 static inline bool
 xfs_sb_has_incompat_log_feature(
 	struct xfs_sb	*sbp,
-	__uint32_t	feature)
+	uint32_t	feature)
 {
 	return (sbp->sb_features_log_incompat & feature) != 0;
 }
@@ -594,8 +594,8 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
  */
 #define XFS_FSB_TO_B(mp,fsbno)	((xfs_fsize_t)(fsbno) << (mp)->m_sb.sb_blocklog)
 #define XFS_B_TO_FSB(mp,b)	\
-	((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_TO_FSBT(mp,b)	(((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
+	((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
+#define XFS_B_TO_FSBT(mp,b)	(((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
 #define XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)
 
 /*
@@ -1072,7 +1072,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
  * next agno_log bits - ag number
  * high agno_log-agblklog-inopblog bits - 0
  */
-#define	XFS_INO_MASK(k)			(__uint32_t)((1ULL << (k)) - 1)
+#define	XFS_INO_MASK(k)			(uint32_t)((1ULL << (k)) - 1)
 #define	XFS_INO_OFFSET_BITS(mp)		(mp)->m_sb.sb_inopblog
 #define	XFS_INO_AGBNO_BITS(mp)		(mp)->m_sb.sb_agblklog
 #define	XFS_INO_AGINO_BITS(mp)		(mp)->m_agino_log
@@ -1211,6 +1211,7 @@ struct xfs_dsymlink_hdr {
 
 #define XFS_SYMLINK_CRC_OFF	offsetof(struct xfs_dsymlink_hdr, sl_crc)
 
+#define XFS_SYMLINK_MAXLEN	1024
 /*
  * The maximum pathlen is 1024 bytes. Since the minimum file system
  * blocksize is 512 bytes, we can get a max of 3 extents back from
@@ -1269,16 +1270,16 @@ typedef __be32 xfs_alloc_ptr_t;
 #define	XFS_FIBT_MAGIC		0x46494254	/* 'FIBT' */
 #define	XFS_FIBT_CRC_MAGIC	0x46494233	/* 'FIB3' */
 
-typedef	__uint64_t	xfs_inofree_t;
+typedef uint64_t	xfs_inofree_t;
 #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
 #define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3)
 #define	XFS_INOBT_ALL_FREE		((xfs_inofree_t)-1)
 #define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i))
 
 #define XFS_INOBT_HOLEMASK_FULL		0	/* holemask for full chunk */
-#define XFS_INOBT_HOLEMASK_BITS		(NBBY * sizeof(__uint16_t))
+#define XFS_INOBT_HOLEMASK_BITS		(NBBY * sizeof(uint16_t))
 #define XFS_INODES_PER_HOLEMASK_BIT	\
-	(XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
+	(XFS_INODES_PER_CHUNK / (NBBY * sizeof(uint16_t)))
 
 static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
 {
@@ -1312,9 +1313,9 @@ typedef struct xfs_inobt_rec {
 
 typedef struct xfs_inobt_rec_incore {
 	xfs_agino_t	ir_startino;	/* starting inode number */
-	__uint16_t	ir_holemask;	/* hole mask for sparse chunks */
-	__uint8_t	ir_count;	/* total inode count */
-	__uint8_t	ir_freecount;	/* count of free inodes (set bits) */
+	uint16_t	ir_holemask;	/* hole mask for sparse chunks */
+	uint8_t		ir_count;	/* total inode count */
+	uint8_t		ir_freecount;	/* count of free inodes (set bits) */
 	xfs_inofree_t	ir_free;	/* free inode mask */
 } xfs_inobt_rec_incore_t;
 
@@ -1397,15 +1398,15 @@ struct xfs_rmap_rec {
  *  rm_offset:54-60 aren't used and should be zero
  *  rm_offset:0-53 is the block offset within the inode
  */
-#define XFS_RMAP_OFF_ATTR_FORK	((__uint64_t)1ULL << 63)
-#define XFS_RMAP_OFF_BMBT_BLOCK	((__uint64_t)1ULL << 62)
-#define XFS_RMAP_OFF_UNWRITTEN	((__uint64_t)1ULL << 61)
+#define XFS_RMAP_OFF_ATTR_FORK	((uint64_t)1ULL << 63)
+#define XFS_RMAP_OFF_BMBT_BLOCK	((uint64_t)1ULL << 62)
+#define XFS_RMAP_OFF_UNWRITTEN	((uint64_t)1ULL << 61)
 
-#define XFS_RMAP_LEN_MAX	((__uint32_t)~0U)
+#define XFS_RMAP_LEN_MAX	((uint32_t)~0U)
 #define XFS_RMAP_OFF_FLAGS	(XFS_RMAP_OFF_ATTR_FORK | \
 				 XFS_RMAP_OFF_BMBT_BLOCK | \
 				 XFS_RMAP_OFF_UNWRITTEN)
-#define XFS_RMAP_OFF_MASK	((__uint64_t)0x3FFFFFFFFFFFFFULL)
+#define XFS_RMAP_OFF_MASK	((uint64_t)0x3FFFFFFFFFFFFFULL)
 
 #define XFS_RMAP_OFF(off)		((off) & XFS_RMAP_OFF_MASK)
 
@@ -1431,8 +1432,8 @@ struct xfs_rmap_rec {
 struct xfs_rmap_irec {
 	xfs_agblock_t	rm_startblock;	/* extent start block */
 	xfs_extlen_t	rm_blockcount;	/* extent length */
-	__uint64_t	rm_owner;	/* extent owner */
-	__uint64_t	rm_offset;	/* offset within the owner */
+	uint64_t	rm_owner;	/* extent owner */
+	uint64_t	rm_offset;	/* offset within the owner */
 	unsigned int	rm_flags;	/* state flags */
 };
 
@@ -1544,11 +1545,11 @@ typedef struct xfs_bmbt_rec {
 	__be64			l0, l1;
 } xfs_bmbt_rec_t;
 
-typedef __uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
+typedef uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */
 typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
 
 typedef struct xfs_bmbt_rec_host {
-	__uint64_t		l0, l1;
+	uint64_t		l0, l1;
 } xfs_bmbt_rec_host_t;
 
 /*

diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 095bdf0..8c61f21 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h

@@ -302,10 +302,10 @@ typedef struct xfs_bstat {
  * and using two 16bit values to hold new 32bit projid was choosen
  * to retain compatibility with "old" filesystems).
  */
-static inline __uint32_t
+static inline uint32_t
 bstat_get_projid(struct xfs_bstat *bs)
 {
-	return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
+	return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
 }
 
 /*
@@ -446,19 +446,15 @@ typedef struct xfs_handle {
 } xfs_handle_t;
 #define ha_fsid ha_u._ha_fsid
 
-#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.fid_pad	 \
-				 - (char *) &(handle))			  \
-				 + (handle).ha_fid.fid_len)
-
 /*
  * Structure passed to XFS_IOC_SWAPEXT
  */
 typedef struct xfs_swapext
 {
-	__int64_t	sx_version;	/* version */
+	int64_t		sx_version;	/* version */
 #define XFS_SX_VERSION		0
-	__int64_t	sx_fdtarget;	/* fd of target file */
-	__int64_t	sx_fdtmp;	/* fd of tmp file */
+	int64_t		sx_fdtarget;	/* fd of target file */
+	int64_t		sx_fdtmp;	/* fd of tmp file */
 	xfs_off_t	sx_offset;	/* offset into file */
 	xfs_off_t	sx_length;	/* leng from offset */
 	char		sx_pad[16];	/* pad space, unused */
@@ -546,7 +542,7 @@ typedef struct xfs_swapext
 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
 #define XFS_IOC_FSGEOMETRY	     _IOR ('X', 124, struct xfs_fsop_geom)
-#define XFS_IOC_GOINGDOWN	     _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_GOINGDOWN	     _IOR ('X', 125, uint32_t)
 /*	XFS_IOC_GETFSUUID ---------- deprecated 140	 */
 
 

diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index d41ade5..ffd5a15 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c

@@ -46,7 +46,7 @@
 /*
  * Allocation group level functions.
  */
-static inline int
+int
 xfs_ialloc_cluster_alignment(
 	struct xfs_mount	*mp)
 {
@@ -98,24 +98,15 @@ xfs_inobt_update(
 	return xfs_btree_update(cur, &rec);
 }
 
-/*
- * Get the data from the pointed-to record.
- */
-int					/* error */
-xfs_inobt_get_rec(
-	struct xfs_btree_cur	*cur,	/* btree cursor */
-	xfs_inobt_rec_incore_t	*irec,	/* btree record */
-	int			*stat)	/* output: success/failure */
+/* Convert on-disk btree record to incore inobt record. */
+void
+xfs_inobt_btrec_to_irec(
+	struct xfs_mount		*mp,
+	union xfs_btree_rec		*rec,
+	struct xfs_inobt_rec_incore	*irec)
 {
-	union xfs_btree_rec	*rec;
-	int			error;
-
-	error = xfs_btree_get_rec(cur, &rec, stat);
-	if (error || *stat == 0)
-		return error;
-
 	irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
-	if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
+	if (xfs_sb_version_hassparseinodes(&mp->m_sb)) {
 		irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
 		irec->ir_count = rec->inobt.ir_u.sp.ir_count;
 		irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
@@ -130,6 +121,25 @@ xfs_inobt_get_rec(
 				be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
 	}
 	irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int
+xfs_inobt_get_rec(
+	struct xfs_btree_cur		*cur,
+	struct xfs_inobt_rec_incore	*irec,
+	int				*stat)
+{
+	union xfs_btree_rec		*rec;
+	int				error;
+
+	error = xfs_btree_get_rec(cur, &rec, stat);
+	if (error || *stat == 0)
+		return error;
+
+	xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec);
 
 	return 0;
 }
@@ -140,9 +150,9 @@ xfs_inobt_get_rec(
 STATIC int
 xfs_inobt_insert_rec(
 	struct xfs_btree_cur	*cur,
-	__uint16_t		holemask,
-	__uint8_t		count,
-	__int32_t		freecount,
+	uint16_t		holemask,
+	uint8_t			count,
+	int32_t			freecount,
 	xfs_inofree_t		free,
 	int			*stat)
 {
@@ -2542,8 +2552,7 @@ xfs_agi_read_verify(
 	    !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
 		xfs_buf_ioerror(bp, -EFSBADCRC);
 	else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
-				XFS_ERRTAG_IALLOC_READ_AGI,
-				XFS_RANDOM_IALLOC_READ_AGI))
+				XFS_ERRTAG_IALLOC_READ_AGI))
 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
 
 	if (bp->b_error)

diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 0bb8966..b32cfb5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h

@@ -168,5 +168,10 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
 int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
 		xfs_agnumber_t agno, struct xfs_buf **bpp);
 
+union xfs_btree_rec;
+void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, union xfs_btree_rec *rec,
+		struct xfs_inobt_rec_incore *irec);
+
+int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 
 #endif	/* __XFS_IALLOC_H__ */

diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 7c47188..317caba 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c

@@ -175,6 +175,18 @@ xfs_inobt_init_key_from_rec(
 }
 
 STATIC void
+xfs_inobt_init_high_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	__u32			x;
+
+	x = be32_to_cpu(rec->inobt.ir_startino);
+	x += XFS_INODES_PER_CHUNK - 1;
+	key->inobt.ir_startino = cpu_to_be32(x);
+}
+
+STATIC void
 xfs_inobt_init_rec_from_cur(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_rec	*rec)
@@ -219,15 +231,25 @@ xfs_finobt_init_ptr_from_cur(
 	ptr->s = agi->agi_free_root;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_inobt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
 {
-	return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
+	return (int64_t)be32_to_cpu(key->inobt.ir_startino) -
 			  cur->bc_rec.i.ir_startino;
 }
 
+STATIC int64_t
+xfs_inobt_diff_two_keys(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	return (int64_t)be32_to_cpu(k1->inobt.ir_startino) -
+			  be32_to_cpu(k2->inobt.ir_startino);
+}
+
 static int
 xfs_inobt_verify(
 	struct xfs_buf		*bp)
@@ -302,7 +324,6 @@ const struct xfs_buf_ops xfs_inobt_buf_ops = {
 	.verify_write = xfs_inobt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_inobt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -322,7 +343,6 @@ xfs_inobt_recs_inorder(
 	return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
 		be32_to_cpu(r2->inobt.ir_startino);
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_inobt_ops = {
 	.rec_len		= sizeof(xfs_inobt_rec_t),
@@ -335,14 +355,14 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
 	.get_minrecs		= xfs_inobt_get_minrecs,
 	.get_maxrecs		= xfs_inobt_get_maxrecs,
 	.init_key_from_rec	= xfs_inobt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_inobt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_inobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
-#endif
 };
 
 static const struct xfs_btree_ops xfs_finobt_ops = {
@@ -356,14 +376,14 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
 	.get_minrecs		= xfs_inobt_get_minrecs,
 	.get_maxrecs		= xfs_inobt_get_maxrecs,
 	.init_key_from_rec	= xfs_inobt_init_key_from_rec,
+	.init_high_key_from_rec	= xfs_inobt_init_high_key_from_rec,
 	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur,
 	.init_ptr_from_cur	= xfs_finobt_init_ptr_from_cur,
 	.key_diff		= xfs_inobt_key_diff,
 	.buf_ops		= &xfs_inobt_buf_ops,
-#if defined(DEBUG) || defined(XFS_WARN)
+	.diff_two_keys		= xfs_inobt_diff_two_keys,
 	.keys_inorder		= xfs_inobt_keys_inorder,
 	.recs_inorder		= xfs_inobt_recs_inorder,
-#endif
 };
 
 /*

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 09c3d1a..378f8fb 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c

@@ -105,8 +105,7 @@ xfs_inode_buf_verify(
 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 			xfs_dinode_good_version(mp, dip->di_version);
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
+						XFS_ERRTAG_ITOBP_INOTOBP))) {
 			if (readahead) {
 				bp->b_flags &= ~XBF_DONE;
 				xfs_buf_ioerror(bp, -EIO);
@@ -381,7 +380,7 @@ xfs_log_dinode_to_disk(
 	}
 }
 
-static bool
+bool
 xfs_dinode_verify(
 	struct xfs_mount	*mp,
 	xfs_ino_t		ino,
@@ -444,7 +443,7 @@ xfs_dinode_calc_crc(
 	struct xfs_mount	*mp,
 	struct xfs_dinode	*dip)
 {
-	__uint32_t		crc;
+	uint32_t		crc;
 
 	if (dip->di_version < 3)
 		return;

diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 6848a0a..a9c97a3 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h

@@ -28,26 +28,26 @@ struct xfs_dinode;
  * format specific structures at the appropriate time.
  */
 struct xfs_icdinode {
-	__int8_t	di_version;	/* inode version */
-	__int8_t	di_format;	/* format of di_c data */
-	__uint16_t	di_flushiter;	/* incremented on flush */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	int8_t		di_version;	/* inode version */
+	int8_t		di_format;	/* format of di_c data */
+	uint16_t	di_flushiter;	/* incremented on flush */
+	uint32_t	di_uid;		/* owner's user id */
+	uint32_t	di_gid;		/* owner's group id */
+	uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	uint16_t	di_projid_hi;	/* higher part of owner's project id */
 	xfs_fsize_t	di_size;	/* number of bytes in file */
 	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
 	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
 	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	__int8_t	di_aformat;	/* format of attr fork's data */
-	__uint32_t	di_dmevmask;	/* DMIG event mask */
-	__uint16_t	di_dmstate;	/* DMIG state info */
-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	int8_t		di_aformat;	/* format of attr fork's data */
+	uint32_t	di_dmevmask;	/* DMIG event mask */
+	uint16_t	di_dmstate;	/* DMIG state info */
+	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
 
-	__uint64_t	di_flags2;	/* more random flags */
-	__uint32_t	di_cowextsize;	/* basic cow extent size for file */
+	uint64_t	di_flags2;	/* more random flags */
+	uint32_t	di_cowextsize;	/* basic cow extent size for file */
 
 	xfs_ictimestamp_t di_crtime;	/* time created */
 };
@@ -82,4 +82,7 @@ void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define	xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
 
+bool	xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
+			  struct xfs_dinode *dip);
+
 #endif	/* __XFS_INODE_BUF_H__ */

diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 7ae571f..8372e9b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h

@@ -31,7 +31,7 @@ struct xfs_trans_res;
  * through all the log items definitions and everything they encode into the
  * log.
  */
-typedef __uint32_t xlog_tid_t;
+typedef uint32_t xlog_tid_t;
 
 #define XLOG_MIN_ICLOGS		2
 #define XLOG_MAX_ICLOGS		8
@@ -211,7 +211,7 @@ typedef struct xfs_log_iovec {
 typedef struct xfs_trans_header {
 	uint		th_magic;		/* magic number */
 	uint		th_type;		/* transaction type */
-	__int32_t	th_tid;			/* transaction id (unused) */
+	int32_t		th_tid;			/* transaction id (unused) */
 	uint		th_num_items;		/* num items logged by trans */
 } xfs_trans_header_t;
 
@@ -265,52 +265,52 @@ typedef struct xfs_trans_header {
  * must be added on to the end.
  */
 typedef struct xfs_inode_log_format {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_t;
 
 typedef struct xfs_inode_log_format_32 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } __attribute__((packed)) xfs_inode_log_format_32_t;
 
 typedef struct xfs_inode_log_format_64 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
-	__uint64_t		ilf_ino;	/* inode number */
+	uint16_t		ilf_type;	/* inode log item type */
+	uint16_t		ilf_size;	/* size of this item */
+	uint32_t		ilf_fields;	/* flags for fields logged */
+	uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	uint16_t		ilf_dsize;	/* size of data/ext/root */
+	uint32_t		ilf_pad;	/* pad for 64 bit boundary */
+	uint64_t		ilf_ino;	/* inode number */
 	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
 		uuid_t		ilfu_uuid;	/* mount point value */
 	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
+	int64_t			ilf_blkno;	/* blkno of inode buffer */
+	int32_t			ilf_len;	/* len of inode buffer */
+	int32_t			ilf_boffset;	/* off of inode in buffer */
 } xfs_inode_log_format_64_t;
 
 
@@ -379,8 +379,8 @@ static inline int xfs_ilog_fdata(int w)
  * information.
  */
 typedef struct xfs_ictimestamp {
-	__int32_t	t_sec;		/* timestamp seconds */
-	__int32_t	t_nsec;		/* timestamp nanoseconds */
+	int32_t		t_sec;		/* timestamp seconds */
+	int32_t		t_nsec;		/* timestamp nanoseconds */
 } xfs_ictimestamp_t;
 
 /*
@@ -388,18 +388,18 @@ typedef struct xfs_ictimestamp {
  * kept identical to struct xfs_dinode except for the endianness annotations.
  */
 struct xfs_log_dinode {
-	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
-	__uint16_t	di_mode;	/* mode and type of file */
-	__int8_t	di_version;	/* inode version */
-	__int8_t	di_format;	/* format of di_c data */
-	__uint8_t	di_pad3[2];	/* unused in v2/3 inodes */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
-	__uint32_t	di_nlink;	/* number of links to file */
-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
-	__uint8_t	di_pad[6];	/* unused, zeroed space */
-	__uint16_t	di_flushiter;	/* incremented on flush */
+	uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
+	uint16_t	di_mode;	/* mode and type of file */
+	int8_t		di_version;	/* inode version */
+	int8_t		di_format;	/* format of di_c data */
+	uint8_t		di_pad3[2];	/* unused in v2/3 inodes */
+	uint32_t	di_uid;		/* owner's user id */
+	uint32_t	di_gid;		/* owner's group id */
+	uint32_t	di_nlink;	/* number of links to file */
+	uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	uint8_t		di_pad[6];	/* unused, zeroed space */
+	uint16_t	di_flushiter;	/* incremented on flush */
 	xfs_ictimestamp_t di_atime;	/* time last accessed */
 	xfs_ictimestamp_t di_mtime;	/* time last modified */
 	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
@@ -408,23 +408,23 @@ struct xfs_log_dinode {
 	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
 	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
 	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	__int8_t	di_aformat;	/* format of attr fork's data */
-	__uint32_t	di_dmevmask;	/* DMIG event mask */
-	__uint16_t	di_dmstate;	/* DMIG state info */
-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
-	__uint32_t	di_gen;		/* generation number */
+	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	int8_t		di_aformat;	/* format of attr fork's data */
+	uint32_t	di_dmevmask;	/* DMIG event mask */
+	uint16_t	di_dmstate;	/* DMIG state info */
+	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	uint32_t	di_gen;		/* generation number */
 
 	/* di_next_unlinked is the only non-core field in the old dinode */
 	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
 
 	/* start of the extended dinode, writable fields */
-	__uint32_t	di_crc;		/* CRC of the inode */
-	__uint64_t	di_changecount;	/* number of attribute changes */
+	uint32_t	di_crc;		/* CRC of the inode */
+	uint64_t	di_changecount;	/* number of attribute changes */
 	xfs_lsn_t	di_lsn;		/* flush sequence */
-	__uint64_t	di_flags2;	/* more random flags */
-	__uint32_t	di_cowextsize;	/* basic cow extent size for file */
-	__uint8_t	di_pad2[12];	/* more padding for future expansion */
+	uint64_t	di_flags2;	/* more random flags */
+	uint32_t	di_cowextsize;	/* basic cow extent size for file */
+	uint8_t		di_pad2[12];	/* more padding for future expansion */
 
 	/* fields only written to during inode creation */
 	xfs_ictimestamp_t di_crtime;	/* time created */
@@ -483,7 +483,7 @@ typedef struct xfs_buf_log_format {
 	unsigned short	blf_size;	/* size of this item */
 	unsigned short	blf_flags;	/* misc state */
 	unsigned short	blf_len;	/* number of blocks in this buf */
-	__int64_t	blf_blkno;	/* starting blkno of this buf */
+	int64_t		blf_blkno;	/* starting blkno of this buf */
 	unsigned int	blf_map_size;	/* used size of data bitmap in words */
 	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
 } xfs_buf_log_format_t;
@@ -533,7 +533,7 @@ xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
 	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
 }
 
-static inline __uint16_t
+static inline uint16_t
 xfs_blft_from_flags(struct xfs_buf_log_format *blf)
 {
 	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
@@ -554,14 +554,14 @@ typedef struct xfs_extent {
  * conversion routine.
  */
 typedef struct xfs_extent_32 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
+	uint64_t	ext_start;
+	uint32_t	ext_len;
 } __attribute__((packed)) xfs_extent_32_t;
 
 typedef struct xfs_extent_64 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
-	__uint32_t	ext_pad;
+	uint64_t	ext_start;
+	uint32_t	ext_len;
+	uint32_t	ext_pad;
 } xfs_extent_64_t;
 
 /*
@@ -570,26 +570,26 @@ typedef struct xfs_extent_64 {
  * size is given by efi_nextents.
  */
 typedef struct xfs_efi_log_format {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_t;
 
 typedef struct xfs_efi_log_format_32 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
 } __attribute__((packed)) xfs_efi_log_format_32_t;
 
 typedef struct xfs_efi_log_format_64 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
+	uint16_t		efi_type;	/* efi log item type */
+	uint16_t		efi_size;	/* size of this item */
+	uint32_t		efi_nextents;	/* # extents to free */
+	uint64_t		efi_id;		/* efi identifier */
 	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
 } xfs_efi_log_format_64_t;
 
@@ -599,26 +599,26 @@ typedef struct xfs_efi_log_format_64 {
  * size is given by efd_nextents;
  */
 typedef struct xfs_efd_log_format {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_t;
 
 typedef struct xfs_efd_log_format_32 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
 } __attribute__((packed)) xfs_efd_log_format_32_t;
 
 typedef struct xfs_efd_log_format_64 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	uint16_t		efd_type;	/* efd log item type */
+	uint16_t		efd_size;	/* size of this item */
+	uint32_t		efd_nextents;	/* # of extents freed */
+	uint64_t		efd_efi_id;	/* id of corresponding efi */
 	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
 } xfs_efd_log_format_64_t;
 
@@ -626,11 +626,11 @@ typedef struct xfs_efd_log_format_64 {
  * RUI/RUD (reverse mapping) log format definitions
  */
 struct xfs_map_extent {
-	__uint64_t		me_owner;
-	__uint64_t		me_startblock;
-	__uint64_t		me_startoff;
-	__uint32_t		me_len;
-	__uint32_t		me_flags;
+	uint64_t		me_owner;
+	uint64_t		me_startblock;
+	uint64_t		me_startoff;
+	uint32_t		me_len;
+	uint32_t		me_flags;
 };
 
 /* rmap me_flags: upper bits are flags, lower byte is type code */
@@ -659,10 +659,10 @@ struct xfs_map_extent {
  * size is given by rui_nextents.
  */
 struct xfs_rui_log_format {
-	__uint16_t		rui_type;	/* rui log item type */
-	__uint16_t		rui_size;	/* size of this item */
-	__uint32_t		rui_nextents;	/* # extents to free */
-	__uint64_t		rui_id;		/* rui identifier */
+	uint16_t		rui_type;	/* rui log item type */
+	uint16_t		rui_size;	/* size of this item */
+	uint32_t		rui_nextents;	/* # extents to free */
+	uint64_t		rui_id;		/* rui identifier */
 	struct xfs_map_extent	rui_extents[];	/* array of extents to rmap */
 };
 
@@ -680,19 +680,19 @@ xfs_rui_log_format_sizeof(
  * size is given by rud_nextents;
  */
 struct xfs_rud_log_format {
-	__uint16_t		rud_type;	/* rud log item type */
-	__uint16_t		rud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		rud_rui_id;	/* id of corresponding rui */
+	uint16_t		rud_type;	/* rud log item type */
+	uint16_t		rud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		rud_rui_id;	/* id of corresponding rui */
 };
 
 /*
  * CUI/CUD (refcount update) log format definitions
  */
 struct xfs_phys_extent {
-	__uint64_t		pe_startblock;
-	__uint32_t		pe_len;
-	__uint32_t		pe_flags;
+	uint64_t		pe_startblock;
+	uint32_t		pe_len;
+	uint32_t		pe_flags;
 };
 
 /* refcount pe_flags: upper bits are flags, lower byte is type code */
@@ -707,10 +707,10 @@ struct xfs_phys_extent {
  * size is given by cui_nextents.
  */
 struct xfs_cui_log_format {
-	__uint16_t		cui_type;	/* cui log item type */
-	__uint16_t		cui_size;	/* size of this item */
-	__uint32_t		cui_nextents;	/* # extents to free */
-	__uint64_t		cui_id;		/* cui identifier */
+	uint16_t		cui_type;	/* cui log item type */
+	uint16_t		cui_size;	/* size of this item */
+	uint32_t		cui_nextents;	/* # extents to free */
+	uint64_t		cui_id;		/* cui identifier */
 	struct xfs_phys_extent	cui_extents[];	/* array of extents */
 };
 
@@ -728,10 +728,10 @@ xfs_cui_log_format_sizeof(
  * size is given by cud_nextents;
  */
 struct xfs_cud_log_format {
-	__uint16_t		cud_type;	/* cud log item type */
-	__uint16_t		cud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		cud_cui_id;	/* id of corresponding cui */
+	uint16_t		cud_type;	/* cud log item type */
+	uint16_t		cud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		cud_cui_id;	/* id of corresponding cui */
 };
 
 /*
@@ -755,10 +755,10 @@ struct xfs_cud_log_format {
  * size is given by bui_nextents.
  */
 struct xfs_bui_log_format {
-	__uint16_t		bui_type;	/* bui log item type */
-	__uint16_t		bui_size;	/* size of this item */
-	__uint32_t		bui_nextents;	/* # extents to free */
-	__uint64_t		bui_id;		/* bui identifier */
+	uint16_t		bui_type;	/* bui log item type */
+	uint16_t		bui_size;	/* size of this item */
+	uint32_t		bui_nextents;	/* # extents to free */
+	uint64_t		bui_id;		/* bui identifier */
 	struct xfs_map_extent	bui_extents[];	/* array of extents to bmap */
 };
 
@@ -776,10 +776,10 @@ xfs_bui_log_format_sizeof(
  * size is given by bud_nextents;
  */
 struct xfs_bud_log_format {
-	__uint16_t		bud_type;	/* bud log item type */
-	__uint16_t		bud_size;	/* size of this item */
-	__uint32_t		__pad;
-	__uint64_t		bud_bui_id;	/* id of corresponding bui */
+	uint16_t		bud_type;	/* bud log item type */
+	uint16_t		bud_size;	/* size of this item */
+	uint32_t		__pad;
+	uint64_t		bud_bui_id;	/* id of corresponding bui */
 };
 
 /*
@@ -789,12 +789,12 @@ struct xfs_bud_log_format {
  * 32 bits : log_recovery code assumes that.
  */
 typedef struct xfs_dq_logformat {
-	__uint16_t		qlf_type;      /* dquot log item type */
-	__uint16_t		qlf_size;      /* size of this item */
+	uint16_t		qlf_type;      /* dquot log item type */
+	uint16_t		qlf_size;      /* size of this item */
 	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
-	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
-	__int32_t		qlf_len;       /* len of dquot buffer */
-	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
+	int64_t			qlf_blkno;     /* blkno of dquot buffer */
+	int32_t			qlf_len;       /* len of dquot buffer */
+	uint32_t		qlf_boffset;   /* off of dquot in buffer */
 } xfs_dq_logformat_t;
 
 /*
@@ -853,8 +853,8 @@ typedef struct xfs_qoff_logformat {
  * decoding can be done correctly.
  */
 struct xfs_icreate_log {
-	__uint16_t	icl_type;	/* type of log format structure */
-	__uint16_t	icl_size;	/* size of log format structure */
+	uint16_t	icl_type;	/* type of log format structure */
+	uint16_t	icl_size;	/* size of log format structure */
 	__be32		icl_ag;		/* ag being allocated in */
 	__be32		icl_agbno;	/* start block of inode range */
 	__be32		icl_count;	/* number of inodes to initialise */

diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 29a01ec..66948a9 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h

@@ -26,7 +26,7 @@
 #define XLOG_RHASH_SIZE	16
 #define XLOG_RHASH_SHIFT 2
 #define XLOG_RHASH(tid)	\
-	((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
+	((((uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
 
 #define XLOG_MAX_REGIONS_IN_ITEM   (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
 

diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 8eed512..2834574 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h

@@ -27,8 +27,8 @@
  * they may need 64-bit accounting. Hence, 64-bit quota-counters,
  * and quota-limits. This is a waste in the common case, but hey ...
  */
-typedef __uint64_t	xfs_qcnt_t;
-typedef __uint16_t	xfs_qwarncnt_t;
+typedef uint64_t	xfs_qcnt_t;
+typedef uint16_t	xfs_qwarncnt_t;
 
 /*
  * flags for q_flags field in the dquot.
@@ -136,6 +136,8 @@ typedef __uint16_t	xfs_qwarncnt_t;
  */
 #define XFS_QMOPT_INHERIT	0x1000000
 
+#define XFS_QMOPT_NOLOCK	0x2000000 /* don't ilock during dqget */
+
 /*
  * flags to xfs_trans_mod_dquot.
  */

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 82a38d8..900ea23 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c

@@ -784,14 +784,6 @@ xfs_refcount_merge_extents(
 }
 
 /*
- * While we're adjusting the refcounts records of an extent, we have
- * to keep an eye on the number of extents we're dirtying -- run too
- * many in a single transaction and we'll exceed the transaction's
- * reservation and crash the fs.  Each record adds 12 bytes to the
- * log (plus any key updates) so we'll conservatively assume 24 bytes
- * per record.  We must also leave space for btree splits on both ends
- * of the range and space for the CUD and a new CUI.
- *
  * XXX: This is a pretty hand-wavy estimate.  The penalty for guessing
  * true incorrectly is a shutdown FS; the penalty for guessing false
  * incorrectly is more transaction rolls than might be necessary.
@@ -813,8 +805,7 @@ xfs_refcount_still_have_space(
 	 */
 	if (cur->bc_private.a.priv.refc.nr_ops > 2 &&
 	    XFS_TEST_ERROR(false, cur->bc_mp,
-			XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE,
-			XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE))
+			XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
 		return false;
 
 	if (cur->bc_private.a.priv.refc.nr_ops == 0)
@@ -822,7 +813,7 @@ xfs_refcount_still_have_space(
 	else if (overhead > cur->bc_tp->t_log_res)
 		return false;
 	return  cur->bc_tp->t_log_res - overhead >
-		cur->bc_private.a.priv.refc.nr_ops * 32;
+		cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
 }
 
 /*
@@ -1076,8 +1067,7 @@ xfs_refcount_finish_one(
 			blockcount);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_REFCOUNT_FINISH_ONE,
-			XFS_RANDOM_REFCOUNT_FINISH_ONE))
+			XFS_ERRTAG_REFCOUNT_FINISH_ONE))
 		return -EIO;
 
 	/*

diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 098dc668..eafb9d1 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h

@@ -67,4 +67,20 @@ extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
 extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
 		xfs_agnumber_t agno);
 
+/*
+ * While we're adjusting the refcounts records of an extent, we have
+ * to keep an eye on the number of extents we're dirtying -- run too
+ * many in a single transaction and we'll exceed the transaction's
+ * reservation and crash the fs.  Each record adds 12 bytes to the
+ * log (plus any key updates) so we'll conservatively assume 32 bytes
+ * per record.  We must also leave space for btree splits on both ends
+ * of the range and space for the CUD and a new CUI.
+ */
+#define XFS_REFCOUNT_ITEM_OVERHEAD	32
+
+static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
+{
+	return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
+}
+
 #endif	/* __XFS_REFCOUNT_H__ */

diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 50add52..3c59dd3 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c

@@ -202,7 +202,7 @@ xfs_refcountbt_init_ptr_from_cur(
 	ptr->s = agf->agf_refcount_root;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_refcountbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -210,16 +210,16 @@ xfs_refcountbt_key_diff(
 	struct xfs_refcount_irec	*rec = &cur->bc_rec.rc;
 	struct xfs_refcount_key		*kp = &key->refc;
 
-	return (__int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
+	return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_refcountbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	return (__int64_t)be32_to_cpu(k1->refc.rc_startblock) -
+	return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
 			  be32_to_cpu(k2->refc.rc_startblock);
 }
 
@@ -285,7 +285,6 @@ const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
 	.verify_write		= xfs_refcountbt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_refcountbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
@@ -306,7 +305,6 @@ xfs_refcountbt_recs_inorder(
 		be32_to_cpu(r1->refc.rc_blockcount) <=
 		be32_to_cpu(r2->refc.rc_startblock);
 }
-#endif
 
 static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.rec_len		= sizeof(struct xfs_refcount_rec),
@@ -325,10 +323,8 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
 	.key_diff		= xfs_refcountbt_key_diff,
 	.buf_ops		= &xfs_refcountbt_buf_ops,
 	.diff_two_keys		= xfs_refcountbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_refcountbt_keys_inorder,
 	.recs_inorder		= xfs_refcountbt_recs_inorder,
-#endif
 };
 
 /*

diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 06cfb93..55c88a7 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c

@@ -179,7 +179,8 @@ xfs_rmap_delete(
 	return error;
 }
 
-static int
+/* Convert an internal btree record to an rmap record. */
+int
 xfs_rmap_btrec_to_irec(
 	union xfs_btree_rec	*rec,
 	struct xfs_rmap_irec	*irec)
@@ -2061,7 +2062,7 @@ int
 xfs_rmap_finish_one(
 	struct xfs_trans		*tp,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
@@ -2086,8 +2087,7 @@ xfs_rmap_finish_one(
 			startoff, blockcount, state);
 
 	if (XFS_TEST_ERROR(false, mp,
-			XFS_ERRTAG_RMAP_FINISH_ONE,
-			XFS_RANDOM_RMAP_FINISH_ONE))
+			XFS_ERRTAG_RMAP_FINISH_ONE))
 		return -EIO;
 
 	/*
@@ -2182,7 +2182,7 @@ __xfs_rmap_add(
 	struct xfs_mount		*mp,
 	struct xfs_defer_ops		*dfops,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	struct xfs_bmbt_irec		*bmap)
 {
@@ -2266,7 +2266,7 @@ xfs_rmap_alloc_extent(
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	__uint64_t		owner)
+	uint64_t		owner)
 {
 	struct xfs_bmbt_irec	bmap;
 
@@ -2290,7 +2290,7 @@ xfs_rmap_free_extent(
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		bno,
 	xfs_extlen_t		len,
-	__uint64_t		owner)
+	uint64_t		owner)
 {
 	struct xfs_bmbt_irec	bmap;
 

diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 98f908f..466ede6 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h

@@ -179,7 +179,7 @@ enum xfs_rmap_intent_type {
 struct xfs_rmap_intent {
 	struct list_head			ri_list;
 	enum xfs_rmap_intent_type		ri_type;
-	__uint64_t				ri_owner;
+	uint64_t				ri_owner;
 	int					ri_whichfork;
 	struct xfs_bmbt_irec			ri_bmap;
 };
@@ -196,15 +196,15 @@ int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		struct xfs_bmbt_irec *imap);
 int xfs_rmap_alloc_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		__uint64_t owner);
+		uint64_t owner);
 int xfs_rmap_free_extent(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 		xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
-		__uint64_t owner);
+		uint64_t owner);
 
 void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
 		struct xfs_btree_cur *rcur, int error);
 int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
-		__uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+		uint64_t owner, int whichfork, xfs_fileoff_t startoff,
 		xfs_fsblock_t startblock, xfs_filblks_t blockcount,
 		xfs_exntst_t state, struct xfs_btree_cur **pcur);
 
@@ -216,5 +216,8 @@ int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		struct xfs_rmap_irec *irec, int	*stat);
 int xfs_rmap_compare(const struct xfs_rmap_irec *a,
 		const struct xfs_rmap_irec *b);
+union xfs_btree_rec;
+int xfs_rmap_btrec_to_irec(union xfs_btree_rec *rec,
+		struct xfs_rmap_irec *irec);
 
 #endif	/* __XFS_RMAP_H__ */

diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 74e5a54..9d9c919 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c

@@ -199,7 +199,7 @@ xfs_rmapbt_init_high_key_from_rec(
 	union xfs_btree_key	*key,
 	union xfs_btree_rec	*rec)
 {
-	__uint64_t		off;
+	uint64_t		off;
 	int			adj;
 
 	adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
@@ -241,7 +241,7 @@ xfs_rmapbt_init_ptr_from_cur(
 	ptr->s = agf->agf_roots[cur->bc_btnum];
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_rmapbt_key_diff(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*key)
@@ -249,9 +249,9 @@ xfs_rmapbt_key_diff(
 	struct xfs_rmap_irec	*rec = &cur->bc_rec.r;
 	struct xfs_rmap_key	*kp = &key->rmap;
 	__u64			x, y;
-	__int64_t		d;
+	int64_t			d;
 
-	d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+	d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
 	if (d)
 		return d;
 
@@ -271,7 +271,7 @@ xfs_rmapbt_key_diff(
 	return 0;
 }
 
-STATIC __int64_t
+STATIC int64_t
 xfs_rmapbt_diff_two_keys(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
@@ -279,10 +279,10 @@ xfs_rmapbt_diff_two_keys(
 {
 	struct xfs_rmap_key	*kp1 = &k1->rmap;
 	struct xfs_rmap_key	*kp2 = &k2->rmap;
-	__int64_t		d;
+	int64_t			d;
 	__u64			x, y;
 
-	d = (__int64_t)be32_to_cpu(kp1->rm_startblock) -
+	d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
 		       be32_to_cpu(kp2->rm_startblock);
 	if (d)
 		return d;
@@ -377,17 +377,16 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
 	.verify_write		= xfs_rmapbt_write_verify,
 };
 
-#if defined(DEBUG) || defined(XFS_WARN)
 STATIC int
 xfs_rmapbt_keys_inorder(
 	struct xfs_btree_cur	*cur,
 	union xfs_btree_key	*k1,
 	union xfs_btree_key	*k2)
 {
-	__uint32_t		x;
-	__uint32_t		y;
-	__uint64_t		a;
-	__uint64_t		b;
+	uint32_t		x;
+	uint32_t		y;
+	uint64_t		a;
+	uint64_t		b;
 
 	x = be32_to_cpu(k1->rmap.rm_startblock);
 	y = be32_to_cpu(k2->rmap.rm_startblock);
@@ -414,10 +413,10 @@ xfs_rmapbt_recs_inorder(
 	union xfs_btree_rec	*r1,
 	union xfs_btree_rec	*r2)
 {
-	__uint32_t		x;
-	__uint32_t		y;
-	__uint64_t		a;
-	__uint64_t		b;
+	uint32_t		x;
+	uint32_t		y;
+	uint64_t		a;
+	uint64_t		b;
 
 	x = be32_to_cpu(r1->rmap.rm_startblock);
 	y = be32_to_cpu(r2->rmap.rm_startblock);
@@ -437,7 +436,6 @@ xfs_rmapbt_recs_inorder(
 		return 1;
 	return 0;
 }
-#endif	/* DEBUG */
 
 static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.rec_len		= sizeof(struct xfs_rmap_rec),
@@ -456,10 +454,8 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
 	.key_diff		= xfs_rmapbt_key_diff,
 	.buf_ops		= &xfs_rmapbt_buf_ops,
 	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
-#if defined(DEBUG) || defined(XFS_WARN)
 	.keys_inorder		= xfs_rmapbt_keys_inorder,
 	.recs_inorder		= xfs_rmapbt_recs_inorder,
-#endif
 };
 
 /*

diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index e47b99e..5d4e43e 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c

@@ -70,7 +70,7 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
  * Get a buffer for the bitmap or summary file block specified.
  * The buffer is returned read and locked.
  */
-static int
+int
 xfs_rtbuf_get(
 	xfs_mount_t	*mp,		/* file system mount structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
@@ -1011,7 +1011,7 @@ xfs_rtfree_extent(
 	    mp->m_sb.sb_rextents) {
 		if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
 			mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
-		*(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
+		*(uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
 		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
 	}
 	return 0;

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 584ec89..9b5aae2 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c

@@ -448,7 +448,7 @@ xfs_sb_quota_to_disk(
 	struct xfs_dsb	*to,
 	struct xfs_sb	*from)
 {
-	__uint16_t	qflags = from->sb_qflags;
+	uint16_t	qflags = from->sb_qflags;
 
 	to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
 	if (xfs_sb_version_has_pquotino(from)) {
@@ -756,7 +756,7 @@ xfs_sb_mount_common(
 	mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
 
 	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
-	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
+	mp->m_ialloc_inos = (int)MAX((uint16_t)XFS_INODES_PER_CHUNK,
 					sbp->sb_inopblock);
 	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
 

diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 2e2c671..c484877 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c

@@ -114,7 +114,7 @@ xfs_symlink_verify(
 	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
 		return false;
 	if (be32_to_cpu(dsl->sl_offset) +
-				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+				be32_to_cpu(dsl->sl_bytes) >= XFS_SYMLINK_MAXLEN)
 		return false;
 	if (dsl->sl_owner == 0)
 		return false;

diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index b456cca..6bd916b 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c

@@ -477,14 +477,14 @@ xfs_calc_mkdir_reservation(
 /*
  * Making a new symplink is the same as creating a new file, but
  * with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
+ * length (XFS_SYMLINK_MAXLEN).
  */
 STATIC uint
 xfs_calc_symlink_reservation(
 	struct xfs_mount	*mp)
 {
 	return xfs_calc_create_reservation(mp) +
-	       xfs_calc_buf_res(1, MAXPATHLEN);
+	       xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN);
 }
 
 /*

diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 717909f..0220159 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h

@@ -18,34 +18,34 @@
 #ifndef __XFS_TYPES_H__
 #define	__XFS_TYPES_H__
 
-typedef __uint32_t	prid_t;		/* project ID */
+typedef uint32_t	prid_t;		/* project ID */
 
-typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
-typedef	__uint32_t	xfs_agino_t;	/* inode # within allocation grp */
-typedef	__uint32_t	xfs_extlen_t;	/* extent length in blocks */
-typedef	__uint32_t	xfs_agnumber_t;	/* allocation group number */
-typedef __int32_t	xfs_extnum_t;	/* # of extents in a file */
-typedef __int16_t	xfs_aextnum_t;	/* # extents in an attribute fork */
-typedef	__int64_t	xfs_fsize_t;	/* bytes in a file */
-typedef __uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
+typedef uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
+typedef uint32_t	xfs_agino_t;	/* inode # within allocation grp */
+typedef uint32_t	xfs_extlen_t;	/* extent length in blocks */
+typedef uint32_t	xfs_agnumber_t;	/* allocation group number */
+typedef int32_t		xfs_extnum_t;	/* # of extents in a file */
+typedef int16_t		xfs_aextnum_t;	/* # extents in an attribute fork */
+typedef int64_t		xfs_fsize_t;	/* bytes in a file */
+typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
 
-typedef	__int32_t	xfs_suminfo_t;	/* type of bitmap summary info */
-typedef	__int32_t	xfs_rtword_t;	/* word type for bitmap manipulations */
+typedef int32_t		xfs_suminfo_t;	/* type of bitmap summary info */
+typedef int32_t		xfs_rtword_t;	/* word type for bitmap manipulations */
 
-typedef	__int64_t	xfs_lsn_t;	/* log sequence number */
-typedef	__int32_t	xfs_tid_t;	/* transaction identifier */
+typedef int64_t		xfs_lsn_t;	/* log sequence number */
+typedef int32_t		xfs_tid_t;	/* transaction identifier */
 
-typedef	__uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
-typedef	__uint32_t	xfs_dahash_t;	/* dir/attr hash value */
+typedef uint32_t	xfs_dablk_t;	/* dir/attr block number (in file) */
+typedef uint32_t	xfs_dahash_t;	/* dir/attr hash value */
 
-typedef	__uint64_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
-typedef __uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
-typedef __uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
-typedef __uint64_t	xfs_fileoff_t;	/* block number in a file */
-typedef __uint64_t	xfs_filblks_t;	/* number of blocks in a file */
+typedef uint64_t	xfs_fsblock_t;	/* blockno in filesystem (agno|agbno) */
+typedef uint64_t	xfs_rfsblock_t;	/* blockno in filesystem (raw) */
+typedef uint64_t	xfs_rtblock_t;	/* extent (block) in realtime area */
+typedef uint64_t	xfs_fileoff_t;	/* block number in a file */
+typedef uint64_t	xfs_filblks_t;	/* number of blocks in a file */
 
-typedef	__int64_t	xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
-typedef __int64_t	xfs_sfiloff_t;	/* signed block number in a file */
+typedef int64_t		xfs_srtblock_t;	/* signed version of xfs_rtblock_t */
+typedef int64_t		xfs_sfiloff_t;	/* signed block number in a file */
 
 /*
  * Null values for the types.
@@ -125,7 +125,7 @@ struct xfs_name {
  * uid_t and gid_t are hard-coded to 32 bits in the inode.
  * Hence, an 'id' in a dquot is 32 bits..
  */
-typedef __uint32_t	xfs_dqid_t;
+typedef uint32_t	xfs_dqid_t;
 
 /*
  * Constants for bit manipulations.

diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index a742c47..80cd0fd8 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h

@@ -24,6 +24,10 @@
 #define XFS_BUF_LOCK_TRACKING 1
 #endif
 
+#ifdef CONFIG_XFS_ASSERT_FATAL
+#define XFS_ASSERT_FATAL 1
+#endif
+
 #ifdef CONFIG_XFS_WARN
 #define XFS_WARN 1
 #endif

diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b468e04..7034e17 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c

@@ -170,8 +170,8 @@ xfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-STATIC int
-__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+int
+__xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	struct xfs_inode *ip = XFS_I(inode);
 	unsigned char *ea_name;
@@ -268,5 +268,5 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	}
 
  set_acl:
-	return __xfs_set_acl(inode, type, acl);
+	return __xfs_set_acl(inode, acl, type);
 }

diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 286fa89..0432731 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h

@@ -24,6 +24,7 @@ struct posix_acl;
 #ifdef CONFIG_XFS_POSIX_ACL
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
 extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #else
 static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
 {

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index d20c29b..6bf120b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -839,7 +839,7 @@ xfs_writepage_map(
 	struct inode		*inode,
 	struct page		*page,
 	loff_t			offset,
-	__uint64_t              end_offset)
+	uint64_t              end_offset)
 {
 	LIST_HEAD(submit_list);
 	struct xfs_ioend	*ioend, *next;
@@ -994,7 +994,7 @@ xfs_do_writepage(
 	struct xfs_writepage_ctx *wpc = data;
 	struct inode		*inode = page->mapping->host;
 	loff_t			offset;
-	__uint64_t              end_offset;
+	uint64_t              end_offset;
 	pgoff_t                 end_index;
 
 	trace_xfs_writepage(inode, page, 0, 0);

diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index d14691a..5d5a5e2 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h

@@ -117,6 +117,7 @@ typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
 			      unsigned char *, int, int);
 
 typedef struct xfs_attr_list_context {
+	struct xfs_trans		*tp;
 	struct xfs_inode		*dp;		/* inode */
 	struct attrlist_cursor_kern	*cursor;	/* position in list */
 	char				*alist;		/* output buffer */
@@ -140,8 +141,10 @@ typedef struct xfs_attr_list_context {
  * Overall external interface routines.
  */
 int xfs_attr_inactive(struct xfs_inode *dp);
+int xfs_attr_list_int_ilocked(struct xfs_attr_list_context *);
 int xfs_attr_list_int(struct xfs_attr_list_context *);
 int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
 int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
 		 unsigned char *value, int *valuelenp, int flags);
 int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6..545eca50 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c

@@ -230,7 +230,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	 */
 	bp = NULL;
 	if (cursor->blkno > 0) {
-		error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
+		error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
 					      &bp, XFS_ATTR_FORK);
 		if ((error != 0) && (error != -EFSCORRUPTED))
 			return error;
@@ -242,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 			case XFS_DA_NODE_MAGIC:
 			case XFS_DA3_NODE_MAGIC:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				bp = NULL;
 				break;
 			case XFS_ATTR_LEAF_MAGIC:
@@ -254,18 +254,18 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				if (cursor->hashval > be32_to_cpu(
 						entries[leafhdr.count - 1].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
+					xfs_trans_brelse(context->tp, bp);
 					bp = NULL;
 				} else if (cursor->hashval <= be32_to_cpu(
 						entries[0].hashval)) {
 					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
+					xfs_trans_brelse(context->tp, bp);
 					bp = NULL;
 				}
 				break;
 			default:
 				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				bp = NULL;
 			}
 		}
@@ -279,9 +279,9 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 	if (bp == NULL) {
 		cursor->blkno = 0;
 		for (;;) {
-			__uint16_t magic;
+			uint16_t magic;
 
-			error = xfs_da3_node_read(NULL, dp,
+			error = xfs_da3_node_read(context->tp, dp,
 						      cursor->blkno, -1, &bp,
 						      XFS_ATTR_FORK);
 			if (error)
@@ -297,7 +297,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 						     XFS_ERRLEVEL_LOW,
 						     context->dp->i_mount,
 						     node);
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				return -EFSCORRUPTED;
 			}
 
@@ -313,10 +313,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 				}
 			}
 			if (i == nodehdr.count) {
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(context->tp, bp);
 				return 0;
 			}
-			xfs_trans_brelse(NULL, bp);
+			xfs_trans_brelse(context->tp, bp);
 		}
 	}
 	ASSERT(bp != NULL);
@@ -333,12 +333,12 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
 		if (context->seen_enough || leafhdr.forw == 0)
 			break;
 		cursor->blkno = leafhdr.forw;
-		xfs_trans_brelse(NULL, bp);
-		error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);
+		xfs_trans_brelse(context->tp, bp);
+		error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
 		if (error)
 			return error;
 	}
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(context->tp, bp);
 	return 0;
 }
 
@@ -448,16 +448,34 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
 	trace_xfs_attr_leaf_list(context);
 
 	context->cursor->blkno = 0;
-	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+	error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
 	if (error)
 		return error;
 
 	xfs_attr3_leaf_list_int(bp, context);
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(context->tp, bp);
 	return 0;
 }
 
 int
+xfs_attr_list_int_ilocked(
+	struct xfs_attr_list_context	*context)
+{
+	struct xfs_inode		*dp = context->dp;
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (!xfs_inode_hasattr(dp))
+		return 0;
+	else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+		return xfs_attr_shortform_list(context);
+	else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
+		return xfs_attr_leaf_list(context);
+	return xfs_attr_node_list(context);
+}
+
+int
 xfs_attr_list_int(
 	xfs_attr_list_context_t *context)
 {
@@ -470,19 +488,8 @@ xfs_attr_list_int(
 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
 		return -EIO;
 
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
 	lock_mode = xfs_ilock_attr_map_shared(dp);
-	if (!xfs_inode_hasattr(dp)) {
-		error = 0;
-	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		error = xfs_attr_shortform_list(context);
-	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-		error = xfs_attr_leaf_list(context);
-	} else {
-		error = xfs_attr_node_list(context);
-	}
+	error = xfs_attr_list_int_ilocked(context);
 	xfs_iunlock(dp, lock_mode);
 	return error;
 }

diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index d419d23..8807391 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c

@@ -396,6 +396,7 @@ xfs_bui_recover(
 	struct xfs_map_extent		*bmap;
 	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			inode_fsb;
+	xfs_filblks_t			count;
 	bool				op_ok;
 	struct xfs_bud_log_item		*budp;
 	enum xfs_bmap_intent_type	type;
@@ -404,6 +405,7 @@ xfs_bui_recover(
 	struct xfs_trans		*tp;
 	struct xfs_inode		*ip = NULL;
 	struct xfs_defer_ops		dfops;
+	struct xfs_bmbt_irec		irec;
 	xfs_fsblock_t			firstfsb;
 
 	ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags));
@@ -481,13 +483,24 @@ xfs_bui_recover(
 	}
 	xfs_trans_ijoin(tp, ip, 0);
 
+	count = bmap->me_len;
 	error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type,
 			ip, whichfork, bmap->me_startoff,
-			bmap->me_startblock, bmap->me_len,
-			state);
+			bmap->me_startblock, &count, state);
 	if (error)
 		goto err_dfops;
 
+	if (count > 0) {
+		ASSERT(type == XFS_BMAP_UNMAP);
+		irec.br_startblock = bmap->me_startblock;
+		irec.br_blockcount = count;
+		irec.br_startoff = bmap->me_startoff;
+		irec.br_state = state;
+		error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec);
+		if (error)
+			goto err_dfops;
+	}
+
 	/* Finish transaction, free inodes. */
 	error = xfs_defer_finish(&tp, &dfops, NULL);
 	if (error)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 9e3cc21..93e9552 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -219,20 +219,24 @@ xfs_bmap_eof(
  */
 
 /*
- * Count leaf blocks given a range of extent records.
+ * Count leaf blocks given a range of extent records.  Delayed allocation
+ * extents are not counted towards the totals.
  */
 STATIC void
 xfs_bmap_count_leaves(
-	xfs_ifork_t		*ifp,
-	xfs_extnum_t		idx,
-	int			numrecs,
-	int			*count)
+	struct xfs_ifork	*ifp,
+	xfs_extnum_t		*numrecs,
+	xfs_filblks_t		*count)
 {
-	int		b;
+	xfs_extnum_t		i;
+	xfs_extnum_t		nr_exts = xfs_iext_count(ifp);
 
-	for (b = 0; b < numrecs; b++) {
-		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
-		*count += xfs_bmbt_get_blockcount(frp);
+	for (i = 0; i < nr_exts; i++) {
+		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, i);
+		if (!isnullstartblock(xfs_bmbt_get_startblock(frp))) {
+			(*numrecs)++;
+			*count += xfs_bmbt_get_blockcount(frp);
+		}
 	}
 }
 
@@ -245,7 +249,7 @@ xfs_bmap_disk_count_leaves(
 	struct xfs_mount	*mp,
 	struct xfs_btree_block	*block,
 	int			numrecs,
-	int			*count)
+	xfs_filblks_t		*count)
 {
 	int		b;
 	xfs_bmbt_rec_t	*frp;
@@ -260,17 +264,18 @@ xfs_bmap_disk_count_leaves(
  * Recursively walks each level of a btree
  * to count total fsblocks in use.
  */
-STATIC int                                     /* error */
+STATIC int
 xfs_bmap_count_tree(
-	xfs_mount_t     *mp,            /* file system mount point */
-	xfs_trans_t     *tp,            /* transaction pointer */
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_fsblock_t   blockno,	/* file system block number */
-	int             levelin,	/* level in btree */
-	int		*count)		/* Count of blocks */
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_ifork	*ifp,
+	xfs_fsblock_t		blockno,
+	int			levelin,
+	xfs_extnum_t		*nextents,
+	xfs_filblks_t		*count)
 {
 	int			error;
-	xfs_buf_t		*bp, *nbp;
+	struct xfs_buf		*bp, *nbp;
 	int			level = levelin;
 	__be64			*pp;
 	xfs_fsblock_t           bno = blockno;
@@ -303,8 +308,9 @@ xfs_bmap_count_tree(
 		/* Dive to the next level */
 		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
 		bno = be64_to_cpu(*pp);
-		if (unlikely((error =
-		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+		error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
+				count);
+		if (error) {
 			xfs_trans_brelse(tp, bp);
 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
 					 XFS_ERRLEVEL_LOW, mp);
@@ -316,6 +322,7 @@ xfs_bmap_count_tree(
 		for (;;) {
 			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
 			numrecs = be16_to_cpu(block->bb_numrecs);
+			(*nextents) += numrecs;
 			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
 			xfs_trans_brelse(tp, bp);
 			if (nextbno == NULLFSBLOCK)
@@ -334,46 +341,64 @@ xfs_bmap_count_tree(
 }
 
 /*
- * Count fsblocks of the given fork.
+ * Count fsblocks of the given fork.  Delayed allocation extents are
+ * not counted towards the totals.
  */
-static int					/* error */
+int
 xfs_bmap_count_blocks(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode */
-	int			whichfork,	/* data or attr fork */
-	int			*count)		/* out: count of blocks */
+	struct xfs_trans	*tp,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	xfs_extnum_t		*nextents,
+	xfs_filblks_t		*count)
 {
-	struct xfs_btree_block	*block;	/* current btree block */
-	xfs_fsblock_t		bno;	/* block # of "block" */
-	xfs_ifork_t		*ifp;	/* fork structure */
-	int			level;	/* btree level, for checking */
-	xfs_mount_t		*mp;	/* file system mount structure */
+	struct xfs_mount	*mp;	/* file system mount structure */
 	__be64			*pp;	/* pointer to block address */
+	struct xfs_btree_block	*block;	/* current btree block */
+	struct xfs_ifork	*ifp;	/* fork structure */
+	xfs_fsblock_t		bno;	/* block # of "block" */
+	int			level;	/* btree level, for checking */
+	int			error;
 
 	bno = NULLFSBLOCK;
 	mp = ip->i_mount;
+	*nextents = 0;
+	*count = 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		xfs_bmap_count_leaves(ifp, 0, xfs_iext_count(ifp), count);
+	if (!ifp)
 		return 0;
-	}
 
-	/*
-	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
-	 */
-	block = ifp->if_broot;
-	level = be16_to_cpu(block->bb_level);
-	ASSERT(level > 0);
-	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
-	bno = be64_to_cpu(*pp);
-	ASSERT(bno != NULLFSBLOCK);
-	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_EXTENTS:
+		xfs_bmap_count_leaves(ifp, nextents, count);
+		return 0;
+	case XFS_DINODE_FMT_BTREE:
+		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+			error = xfs_iread_extents(tp, ip, whichfork);
+			if (error)
+				return error;
+		}
 
-	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
-		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
-				 mp);
-		return -EFSCORRUPTED;
+		/*
+		 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+		 */
+		block = ifp->if_broot;
+		level = be16_to_cpu(block->bb_level);
+		ASSERT(level > 0);
+		pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+		bno = be64_to_cpu(*pp);
+		ASSERT(bno != NULLFSBLOCK);
+		ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+		ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+		error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
+				nextents, count);
+		if (error) {
+			XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
+					XFS_ERRLEVEL_LOW, mp);
+			return -EFSCORRUPTED;
+		}
+		return 0;
 	}
 
 	return 0;
@@ -389,11 +414,11 @@ xfs_getbmapx_fix_eof_hole(
 	struct getbmapx		*out,		/* output structure */
 	int			prealloced,	/* this is a file with
 						 * preallocated data space */
-	__int64_t		end,		/* last block requested */
+	int64_t			end,		/* last block requested */
 	xfs_fsblock_t		startblock,
 	bool			moretocome)
 {
-	__int64_t		fixlen;
+	int64_t			fixlen;
 	xfs_mount_t		*mp;		/* file system mount point */
 	xfs_ifork_t		*ifp;		/* inode fork pointer */
 	xfs_extnum_t		lastx;		/* last extent pointer */
@@ -455,8 +480,8 @@ xfs_getbmap_adjust_shared(
 
 	agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
 	agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock);
-	error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount,
-			&ebno, &elen, true);
+	error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+			map->br_blockcount, &ebno, &elen, true);
 	if (error)
 		return error;
 
@@ -514,9 +539,9 @@ xfs_getbmap(
 	xfs_bmap_format_t	formatter,	/* format to user */
 	void			*arg)		/* formatter arg */
 {
-	__int64_t		bmvend;		/* last block requested */
+	int64_t			bmvend;		/* last block requested */
 	int			error = 0;	/* return value */
-	__int64_t		fixlen;		/* length for -1 case */
+	int64_t			fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
 	int			lock;		/* lock state */
 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
@@ -605,7 +630,7 @@ xfs_getbmap(
 	if (bmv->bmv_length == -1) {
 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
 		bmv->bmv_length =
-			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+			max_t(int64_t, fixlen - bmv->bmv_offset, 0);
 	} else if (bmv->bmv_length == 0) {
 		bmv->bmv_entries = 0;
 		return 0;
@@ -742,7 +767,7 @@ xfs_getbmap(
 				out[cur_ext].bmv_offset +
 				out[cur_ext].bmv_length;
 			bmv->bmv_length =
-				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+				max_t(int64_t, 0, bmvend - bmv->bmv_offset);
 
 			/*
 			 * In case we don't want to return the hole,
@@ -1617,7 +1642,7 @@ xfs_swap_extents_check_format(
 	 * extent format...
 	 */
 	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(ip) &&
+		if (XFS_IFORK_Q(ip) &&
 		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
 			return -EINVAL;
 		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
@@ -1627,7 +1652,7 @@ xfs_swap_extents_check_format(
 
 	/* Reciprocal target->temp btree format checks */
 	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(tip) &&
+		if (XFS_IFORK_Q(tip) &&
 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
 			return -EINVAL;
 		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
@@ -1676,7 +1701,7 @@ xfs_swap_extent_rmap(
 	xfs_filblks_t			ilen;
 	xfs_filblks_t			rlen;
 	int				nimaps;
-	__uint64_t			tip_flags2;
+	uint64_t			tip_flags2;
 
 	/*
 	 * If the source file has shared blocks, we must flag the donor
@@ -1789,10 +1814,11 @@ xfs_swap_extent_forks(
 	int			*target_log_flags)
 {
 	struct xfs_ifork	tempifp, *ifp, *tifp;
-	int			aforkblks = 0;
-	int			taforkblks = 0;
+	xfs_filblks_t		aforkblks = 0;
+	xfs_filblks_t		taforkblks = 0;
+	xfs_extnum_t		junk;
 	xfs_extnum_t		nextents;
-	__uint64_t		tmp;
+	uint64_t		tmp;
 	int			error;
 
 	/*
@@ -1800,14 +1826,14 @@ xfs_swap_extent_forks(
 	 */
 	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
 	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK,
+		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
 				&aforkblks);
 		if (error)
 			return error;
 	}
 	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
 	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
 				&taforkblks);
 		if (error)
 			return error;
@@ -1850,15 +1876,15 @@ xfs_swap_extent_forks(
 	/*
 	 * Fix the on-disk inode values
 	 */
-	tmp = (__uint64_t)ip->i_d.di_nblocks;
+	tmp = (uint64_t)ip->i_d.di_nblocks;
 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
 
-	tmp = (__uint64_t) ip->i_d.di_nextents;
+	tmp = (uint64_t) ip->i_d.di_nextents;
 	ip->i_d.di_nextents = tip->i_d.di_nextents;
 	tip->i_d.di_nextents = tmp;
 
-	tmp = (__uint64_t) ip->i_d.di_format;
+	tmp = (uint64_t) ip->i_d.di_format;
 	ip->i_d.di_format = tip->i_d.di_format;
 	tip->i_d.di_format = tmp;
 
@@ -1927,7 +1953,7 @@ xfs_swap_extents(
 	int			error = 0;
 	int			lock_flags;
 	struct xfs_ifork	*cowfp;
-	__uint64_t		f;
+	uint64_t		f;
 	int			resblks;
 
 	/*

diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 135d826..0cede10 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h

@@ -70,4 +70,8 @@ int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
 
 xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
 
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+			  int whichfork, xfs_extnum_t *nextents,
+			  xfs_filblks_t *count);
+
 #endif	/* __XFS_BMAP_UTIL_H__ */

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 438505f..72f0384 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c

@@ -1194,7 +1194,7 @@ xfs_buf_ioerror_alert(
 {
 	xfs_alert(bp->b_target->bt_mount,
 "metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
-		(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
+		(uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
 }
 
 int
@@ -2050,6 +2050,66 @@ xfs_buf_delwri_submit(
 	return error;
 }
 
+/*
+ * Push a single buffer on a delwri queue.
+ *
+ * The purpose of this function is to submit a single buffer of a delwri queue
+ * and return with the buffer still on the original queue. The waiting delwri
+ * buffer submission infrastructure guarantees transfer of the delwri queue
+ * buffer reference to a temporary wait list. We reuse this infrastructure to
+ * transfer the buffer back to the original queue.
+ *
+ * Note the buffer transitions from the queued state, to the submitted and wait
+ * listed state and back to the queued state during this call. The buffer
+ * locking and queue management logic between _delwri_pushbuf() and
+ * _delwri_queue() guarantee that the buffer cannot be queued to another list
+ * before returning.
+ */
+int
+xfs_buf_delwri_pushbuf(
+	struct xfs_buf		*bp,
+	struct list_head	*buffer_list)
+{
+	LIST_HEAD		(submit_list);
+	int			error;
+
+	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+	trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
+
+	/*
+	 * Isolate the buffer to a new local list so we can submit it for I/O
+	 * independently from the rest of the original list.
+	 */
+	xfs_buf_lock(bp);
+	list_move(&bp->b_list, &submit_list);
+	xfs_buf_unlock(bp);
+
+	/*
+	 * Delwri submission clears the DELWRI_Q buffer flag and returns with
+	 * the buffer on the wait list with an associated reference. Rather than
+	 * bounce the buffer from a local wait list back to the original list
+	 * after I/O completion, reuse the original list as the wait list.
+	 */
+	xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
+
+	/*
+	 * The buffer is now under I/O and wait listed as during typical delwri
+	 * submission. Lock the buffer to wait for I/O completion. Rather than
+	 * remove the buffer from the wait list and release the reference, we
+	 * want to return with the buffer queued to the original list. The
+	 * buffer already sits on the original list with a wait list reference,
+	 * however. If we let the queue inherit that wait list reference, all we
+	 * need to do is reset the DELWRI_Q flag.
+	 */
+	xfs_buf_lock(bp);
+	error = bp->b_error;
+	bp->b_flags |= _XBF_DELWRI_Q;
+	xfs_buf_unlock(bp);
+
+	return error;
+}
+
 int __init
 xfs_buf_init(void)
 {

diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 1508121..2072126 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h

@@ -332,6 +332,7 @@ extern void xfs_buf_delwri_cancel(struct list_head *);
 extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
 extern int xfs_buf_delwri_submit(struct list_head *);
 extern int xfs_buf_delwri_submit_nowait(struct list_head *);
+extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
 
 /* Buffer Daemon Setup Routines */
 extern int xfs_buf_init(void);

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 0306168..f6a8422 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c

@@ -636,20 +636,23 @@ xfs_buf_item_unlock(
 
 	/*
 	 * Clean buffers, by definition, cannot be in the AIL. However, aborted
-	 * buffers may be dirty and hence in the AIL. Therefore if we are
-	 * aborting a buffer and we've just taken the last refernce away, we
-	 * have to check if it is in the AIL before freeing it. We need to free
-	 * it in this case, because an aborted transaction has already shut the
-	 * filesystem down and this is the last chance we will have to do so.
+	 * buffers may be in the AIL regardless of dirty state. An aborted
+	 * transaction that invalidates a buffer already in the AIL may have
+	 * marked it stale and cleared the dirty state, for example.
+	 *
+	 * Therefore if we are aborting a buffer and we've just taken the last
+	 * reference away, we have to check if it is in the AIL before freeing
+	 * it. We need to free it in this case, because an aborted transaction
+	 * has already shut the filesystem down and this is the last chance we
+	 * will have to do so.
 	 */
 	if (atomic_dec_and_test(&bip->bli_refcount)) {
-		if (clean)
-			xfs_buf_item_relse(bp);
-		else if (aborted) {
+		if (aborted) {
 			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
 			xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
 			xfs_buf_item_relse(bp);
-		}
+		} else if (clean)
+			xfs_buf_item_relse(bp);
 	}
 
 	if (!(flags & XFS_BLI_HOLD))

diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 20b7a5c..ba2638d 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c

@@ -44,7 +44,7 @@ static unsigned char xfs_dir3_filetype_table[] = {
 static unsigned char
 xfs_dir3_get_dtype(
 	struct xfs_mount	*mp,
-	__uint8_t		filetype)
+	uint8_t			filetype)
 {
 	if (!xfs_sb_version_hasftype(&mp->m_sb))
 		return DT_UNKNOWN;
@@ -117,7 +117,7 @@ xfs_dir2_sf_getdents(
 	 */
 	sfep = xfs_dir2_sf_firstentry(sfp);
 	for (i = 0; i < sfp->count; i++) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
 				xfs_dir2_sf_get_offset(sfep));
@@ -170,7 +170,7 @@ xfs_dir2_block_getdents(
 		return 0;
 
 	lock_mode = xfs_ilock_data_map_shared(dp);
-	error = xfs_dir3_block_read(NULL, dp, &bp);
+	error = xfs_dir3_block_read(args->trans, dp, &bp);
 	xfs_iunlock(dp, lock_mode);
 	if (error)
 		return error;
@@ -194,7 +194,7 @@ xfs_dir2_block_getdents(
 	 * Each object is a real entry (dep) or an unused one (dup).
 	 */
 	while (ptr < endptr) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		dup = (xfs_dir2_data_unused_t *)ptr;
 		/*
@@ -228,7 +228,7 @@ xfs_dir2_block_getdents(
 		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
 			    be64_to_cpu(dep->inumber),
 			    xfs_dir3_get_dtype(dp->i_mount, filetype))) {
-			xfs_trans_brelse(NULL, bp);
+			xfs_trans_brelse(args->trans, bp);
 			return 0;
 		}
 	}
@@ -239,218 +239,104 @@ xfs_dir2_block_getdents(
 	 */
 	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
 								0x7fffffff;
-	xfs_trans_brelse(NULL, bp);
+	xfs_trans_brelse(args->trans, bp);
 	return 0;
 }
 
-struct xfs_dir2_leaf_map_info {
-	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
-	xfs_dablk_t	map_off;	/* last mapped file offset */
-	int		map_size;	/* total entries in *map */
-	int		map_valid;	/* valid entries in *map */
-	int		nmap;		/* mappings to ask xfs_bmapi */
-	xfs_dir2_db_t	curdb;		/* db for current block */
-	int		ra_current;	/* number of read-ahead blks */
-	int		ra_index;	/* *map index for read-ahead */
-	int		ra_offset;	/* map entry offset for ra */
-	int		ra_want;	/* readahead count wanted */
-	struct xfs_bmbt_irec map[];	/* map vector for blocks */
-};
-
+/*
+ * Read a directory block and initiate readahead for blocks beyond that.
+ * We maintain a sliding readahead window of the remaining space in the
+ * buffer rounded up to the nearest block.
+ */
 STATIC int
 xfs_dir2_leaf_readbuf(
 	struct xfs_da_args	*args,
 	size_t			bufsize,
-	struct xfs_dir2_leaf_map_info *mip,
-	xfs_dir2_off_t		*curoff,
-	struct xfs_buf		**bpp,
-	bool			trim_map)
+	xfs_dir2_off_t		*cur_off,
+	xfs_dablk_t		*ra_blk,
+	struct xfs_buf		**bpp)
 {
 	struct xfs_inode	*dp = args->dp;
 	struct xfs_buf		*bp = NULL;
-	struct xfs_bmbt_irec	*map = mip->map;
-	struct blk_plug		plug;
-	int			error = 0;
-	int			length;
-	int			i;
-	int			j;
 	struct xfs_da_geometry	*geo = args->geo;
+	struct xfs_ifork	*ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+	struct xfs_bmbt_irec	map;
+	struct blk_plug		plug;
+	xfs_dir2_off_t		new_off;
+	xfs_dablk_t		next_ra;
+	xfs_dablk_t		map_off;
+	xfs_dablk_t		last_da;
+	xfs_extnum_t		idx;
+	int			ra_want;
+	int			error = 0;
 
-	/*
-	 * If the caller just finished processing a buffer, it will tell us
-	 * we need to trim that block out of the mapping now it is done.
-	 */
-	if (trim_map) {
-		mip->map_blocks -= geo->fsbcount;
-		/*
-		 * Loop to get rid of the extents for the
-		 * directory block.
-		 */
-		for (i = geo->fsbcount; i > 0; ) {
-			j = min_t(int, map->br_blockcount, i);
-			map->br_blockcount -= j;
-			map->br_startblock += j;
-			map->br_startoff += j;
-			/*
-			 * If mapping is done, pitch it from
-			 * the table.
-			 */
-			if (!map->br_blockcount && --mip->map_valid)
-				memmove(&map[0], &map[1],
-					sizeof(map[0]) * mip->map_valid);
-			i -= j;
-		}
-	}
-
-	/*
-	 * Recalculate the readahead blocks wanted.
-	 */
-	mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;
-	ASSERT(mip->ra_want >= 0);
-
-	/*
-	 * If we don't have as many as we want, and we haven't
-	 * run out of data blocks, get some more mappings.
-	 */
-	if (1 + mip->ra_want > mip->map_blocks &&
-	    mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {
-		/*
-		 * Get more bmaps, fill in after the ones
-		 * we already have in the table.
-		 */
-		mip->nmap = mip->map_size - mip->map_valid;
-		error = xfs_bmapi_read(dp, mip->map_off,
-				xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -
-								mip->map_off,
-				&map[mip->map_valid], &mip->nmap, 0);
-
-		/*
-		 * Don't know if we should ignore this or try to return an
-		 * error.  The trouble with returning errors is that readdir
-		 * will just stop without actually passing the error through.
-		 */
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(args->trans, dp, XFS_DATA_FORK);
 		if (error)
-			goto out;	/* XXX */
-
-		/*
-		 * If we got all the mappings we asked for, set the final map
-		 * offset based on the last bmap value received.  Otherwise,
-		 * we've reached the end.
-		 */
-		if (mip->nmap == mip->map_size - mip->map_valid) {
-			i = mip->map_valid + mip->nmap - 1;
-			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
-		} else
-			mip->map_off = xfs_dir2_byte_to_da(geo,
-							XFS_DIR2_LEAF_OFFSET);
-
-		/*
-		 * Look for holes in the mapping, and eliminate them.  Count up
-		 * the valid blocks.
-		 */
-		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
-			if (map[i].br_startblock == HOLESTARTBLOCK) {
-				mip->nmap--;
-				length = mip->map_valid + mip->nmap - i;
-				if (length)
-					memmove(&map[i], &map[i + 1],
-						sizeof(map[i]) * length);
-			} else {
-				mip->map_blocks += map[i].br_blockcount;
-				i++;
-			}
-		}
-		mip->map_valid += mip->nmap;
+			goto out;
 	}
 
 	/*
-	 * No valid mappings, so no more data blocks.
+	 * Look for mapped directory blocks at or above the current offset.
+	 * Truncate down to the nearest directory block to start the scanning
+	 * operation.
 	 */
-	if (!mip->map_valid) {
-		*curoff = xfs_dir2_da_to_byte(geo, mip->map_off);
+	last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
+	map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *cur_off));
+	if (!xfs_iext_lookup_extent(dp, ifp, map_off, &idx, &map))
 		goto out;
-	}
+	if (map.br_startoff >= last_da)
+		goto out;
+	xfs_trim_extent(&map, map_off, last_da - map_off);
 
-	/*
-	 * Read the directory block starting at the first mapping.
-	 */
-	mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);
-	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
-			map->br_blockcount >= geo->fsbcount ?
-			    XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) :
-			    -1, &bp);
-	/*
-	 * Should just skip over the data block instead of giving up.
-	 */
+	/* Read the directory block of that first mapping. */
+	new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
+	if (new_off > *cur_off)
+		*cur_off = new_off;
+	error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp);
 	if (error)
-		goto out;	/* XXX */
+		goto out;
 
 	/*
-	 * Adjust the current amount of read-ahead: we just read a block that
-	 * was previously ra.
+	 * Start readahead for the next bufsize's worth of dir data blocks.
+	 * We may have already issued readahead for some of that range;
+	 * ra_blk tracks the last block we tried to read(ahead).
 	 */
-	if (mip->ra_current)
-		mip->ra_current -= geo->fsbcount;
+	ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
+	if (*ra_blk >= last_da)
+		goto out;
+	else if (*ra_blk == 0)
+		*ra_blk = map.br_startoff;
+	next_ra = map.br_startoff + geo->fsbcount;
+	if (next_ra >= last_da)
+		goto out_no_ra;
+	if (map.br_blockcount < geo->fsbcount &&
+	    !xfs_iext_get_extent(ifp, ++idx, &map))
+		goto out_no_ra;
+	if (map.br_startoff >= last_da)
+		goto out_no_ra;
+	xfs_trim_extent(&map, next_ra, last_da - next_ra);
 
-	/*
-	 * Do we need more readahead?
-	 * Each loop tries to process 1 full dir blk; last may be partial.
-	 */
+	/* Start ra for each dir (not fs) block that has a mapping. */
 	blk_start_plug(&plug);
-	for (mip->ra_index = mip->ra_offset = i = 0;
-	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
-	     i += geo->fsbcount) {
-		ASSERT(mip->ra_index < mip->map_valid);
-		/*
-		 * Read-ahead a contiguous directory block.
-		 */
-		if (i > mip->ra_current &&
-		    (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
-		    geo->fsbcount) {
-			xfs_dir3_data_readahead(dp,
-				map[mip->ra_index].br_startoff + mip->ra_offset,
-				XFS_FSB_TO_DADDR(dp->i_mount,
-					map[mip->ra_index].br_startblock +
-							mip->ra_offset));
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Read-ahead a non-contiguous directory block.  This doesn't
-		 * use our mapping, but this is a very rare case.
-		 */
-		else if (i > mip->ra_current) {
-			xfs_dir3_data_readahead(dp,
-					map[mip->ra_index].br_startoff +
-							mip->ra_offset, -1);
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Advance offset through the mapping table, processing a full
-		 * dir block even if it is fragmented into several extents.
-		 * But stop if we have consumed all valid mappings, even if
-		 * it's not yet a full directory block.
-		 */
-		for (j = 0;
-		     j < geo->fsbcount && mip->ra_index < mip->map_valid;
-		     j += length ) {
-			/*
-			 * The rest of this extent but not more than a dir
-			 * block.
-			 */
-			length = min_t(int, geo->fsbcount - j,
-					map[mip->ra_index].br_blockcount -
-							mip->ra_offset);
-			mip->ra_offset += length;
-
-			/*
-			 * Advance to the next mapping if this one is used up.
-			 */
-			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
-				mip->ra_offset = 0;
-				mip->ra_index++;
+	while (ra_want > 0) {
+		next_ra = roundup((xfs_dablk_t)map.br_startoff, geo->fsbcount);
+		while (ra_want > 0 &&
+		       next_ra < map.br_startoff + map.br_blockcount) {
+			if (next_ra >= last_da) {
+				*ra_blk = last_da;
+				break;
 			}
+			if (next_ra > *ra_blk) {
+				xfs_dir3_data_readahead(dp, next_ra, -2);
+				*ra_blk = next_ra;
+			}
+			ra_want -= geo->fsbcount;
+			next_ra += geo->fsbcount;
+		}
+		if (!xfs_iext_get_extent(ifp, ++idx, &map)) {
+			*ra_blk = last_da;
+			break;
 		}
 	}
 	blk_finish_plug(&plug);
@@ -458,6 +344,9 @@ xfs_dir2_leaf_readbuf(
 out:
 	*bpp = bp;
 	return error;
+out_no_ra:
+	*ra_blk = last_da;
+	goto out;
 }
 
 /*
@@ -475,14 +364,14 @@ xfs_dir2_leaf_getdents(
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_data_entry_t	*dep;		/* data entry */
 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
-	int			error = 0;	/* error return value */
+	char			*ptr = NULL;	/* pointer to current data */
+	struct xfs_da_geometry	*geo = args->geo;
+	xfs_dablk_t		rablk = 0;	/* current readahead block */
+	xfs_dir2_off_t		curoff;		/* current overall offset */
 	int			length;		/* temporary length value */
 	int			byteoff;	/* offset in current block */
-	xfs_dir2_off_t		curoff;		/* current overall offset */
-	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
-	char			*ptr = NULL;	/* pointer to current data */
-	struct xfs_dir2_leaf_map_info *map_info;
-	struct xfs_da_geometry	*geo = args->geo;
+	int			lock_mode;
+	int			error = 0;	/* error return value */
 
 	/*
 	 * If the offset is at or past the largest allowed value,
@@ -492,73 +381,35 @@ xfs_dir2_leaf_getdents(
 		return 0;
 
 	/*
-	 * Set up to bmap a number of blocks based on the caller's
-	 * buffer size, the directory block size, and the filesystem
-	 * block size.
-	 */
-	length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));
-	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
-				(length * sizeof(struct xfs_bmbt_irec)),
-			       KM_SLEEP | KM_NOFS);
-	map_info->map_size = length;
-
-	/*
 	 * Inside the loop we keep the main offset value as a byte offset
 	 * in the directory file.
 	 */
 	curoff = xfs_dir2_dataptr_to_byte(ctx->pos);
 
 	/*
-	 * Force this conversion through db so we truncate the offset
-	 * down to get the start of the data block.
-	 */
-	map_info->map_off = xfs_dir2_db_to_da(geo,
-					      xfs_dir2_byte_to_db(geo, curoff));
-
-	/*
 	 * Loop over directory entries until we reach the end offset.
 	 * Get more blocks and readahead as necessary.
 	 */
 	while (curoff < XFS_DIR2_LEAF_OFFSET) {
-		__uint8_t filetype;
+		uint8_t filetype;
 
 		/*
 		 * If we have no buffer, or we're off the end of the
 		 * current buffer, need to get another one.
 		 */
 		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
-			int	lock_mode;
-			bool	trim_map = false;
-
 			if (bp) {
-				xfs_trans_brelse(NULL, bp);
+				xfs_trans_brelse(args->trans, bp);
 				bp = NULL;
-				trim_map = true;
 			}
 
 			lock_mode = xfs_ilock_data_map_shared(dp);
-			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
-						      &curoff, &bp, trim_map);
+			error = xfs_dir2_leaf_readbuf(args, bufsize, &curoff,
+					&rablk, &bp);
 			xfs_iunlock(dp, lock_mode);
-			if (error || !map_info->map_valid)
+			if (error || !bp)
 				break;
 
-			/*
-			 * Having done a read, we need to set a new offset.
-			 */
-			newoff = xfs_dir2_db_off_to_byte(geo,
-							 map_info->curdb, 0);
-			/*
-			 * Start of the current block.
-			 */
-			if (curoff < newoff)
-				curoff = newoff;
-			/*
-			 * Make sure we're in the right block.
-			 */
-			else if (curoff > newoff)
-				ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==
-				       map_info->curdb);
 			hdr = bp->b_addr;
 			xfs_dir3_data_check(dp, bp);
 			/*
@@ -643,17 +494,22 @@ xfs_dir2_leaf_getdents(
 		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
 	else
 		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
-	kmem_free(map_info);
 	if (bp)
-		xfs_trans_brelse(NULL, bp);
+		xfs_trans_brelse(args->trans, bp);
 	return error;
 }
 
 /*
  * Read a directory.
+ *
+ * If supplied, the transaction collects locked dir buffers to avoid
+ * nested buffer deadlocks.  This function does not dirty the
+ * transaction.  The caller should ensure that the inode is locked
+ * before calling this function.
  */
 int
 xfs_readdir(
+	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
 	struct dir_context	*ctx,
 	size_t			bufsize)
@@ -672,6 +528,7 @@ xfs_readdir(
 
 	args.dp = dp;
 	args.geo = dp->i_mount->m_dir_geo;
+	args.trans = tp;
 
 	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
 		rval = xfs_dir2_sf_getdents(&args, ctx);

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6a05d27..b2cde54 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c

@@ -39,7 +39,7 @@ xfs_trim_extents(
 	xfs_daddr_t		start,
 	xfs_daddr_t		end,
 	xfs_daddr_t		minlen,
-	__uint64_t		*blocks_trimmed)
+	uint64_t		*blocks_trimmed)
 {
 	struct block_device	*bdev = mp->m_ddev_targp->bt_bdev;
 	struct xfs_btree_cur	*cur;
@@ -166,7 +166,7 @@ xfs_ioc_trim(
 	struct fstrim_range	range;
 	xfs_daddr_t		start, end, minlen;
 	xfs_agnumber_t		start_agno, end_agno, agno;
-	__uint64_t		blocks_trimmed = 0;
+	uint64_t		blocks_trimmed = 0;
 	int			error, last_error = 0;
 
 	if (!capable(CAP_SYS_ADMIN))

diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9d06cc3..f89f7b52 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c

@@ -276,7 +276,7 @@ xfs_qm_init_dquot_blk(
 void
 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
 {
-	__uint64_t space;
+	uint64_t space;
 
 	dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
 	dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit);
@@ -472,18 +472,23 @@ xfs_qm_dqtobp(
 	struct xfs_mount	*mp = dqp->q_mount;
 	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);
 	struct xfs_trans	*tp = (tpp ? *tpp : NULL);
-	uint			lock_mode;
+	uint			lock_mode = 0;
 
 	quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
 	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
-	lock_mode = xfs_ilock_data_map_shared(quotip);
+	ASSERT(!(flags & XFS_QMOPT_NOLOCK) ||
+		xfs_isilocked(quotip, XFS_ILOCK_SHARED) ||
+		xfs_isilocked(quotip, XFS_ILOCK_EXCL));
+	if (!(flags & XFS_QMOPT_NOLOCK))
+		lock_mode = xfs_ilock_data_map_shared(quotip);
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
 		/*
 		 * Return if this type of quotas is turned off while we
 		 * didn't have the quota inode lock.
 		 */
-		xfs_iunlock(quotip, lock_mode);
+		if (lock_mode)
+			xfs_iunlock(quotip, lock_mode);
 		return -ESRCH;
 	}
 
@@ -493,7 +498,8 @@ xfs_qm_dqtobp(
 	error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
 			       XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 
-	xfs_iunlock(quotip, lock_mode);
+	if (lock_mode)
+		xfs_iunlock(quotip, lock_mode);
 	if (error)
 		return error;
 
@@ -695,21 +701,18 @@ xfs_qm_dqread(
  */
 static int
 xfs_dq_get_next_id(
-	xfs_mount_t		*mp,
+	struct xfs_mount	*mp,
 	uint			type,
-	xfs_dqid_t		*id,
-	loff_t			eof)
+	xfs_dqid_t		*id)
 {
-	struct xfs_inode	*quotip;
+	struct xfs_inode	*quotip = xfs_quota_inode(mp, type);
+	xfs_dqid_t		next_id = *id + 1; /* simple advance */
+	uint			lock_flags;
+	struct xfs_bmbt_irec	got;
+	xfs_extnum_t		idx;
 	xfs_fsblock_t		start;
-	loff_t			offset;
-	uint			lock;
-	xfs_dqid_t		next_id;
 	int			error = 0;
 
-	/* Simple advance */
-	next_id = *id + 1;
-
 	/* If we'd wrap past the max ID, stop */
 	if (next_id < *id)
 		return -ENOENT;
@@ -723,23 +726,25 @@ xfs_dq_get_next_id(
 	/* Nope, next_id is now past the current chunk, so find the next one */
 	start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
 
-	quotip = xfs_quota_inode(mp, type);
-	lock = xfs_ilock_data_map_shared(quotip);
+	lock_flags = xfs_ilock_data_map_shared(quotip);
+	if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK);
+		if (error)
+			return error;
+	}
 
-	offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
-				      eof, SEEK_DATA);
-	if (offset < 0)
-		error = offset;
+	if (xfs_iext_lookup_extent(quotip, &quotip->i_df, start, &idx, &got)) {
+		/* contiguous chunk, bump startoff for the id calculation */
+		if (got.br_startoff < start)
+			got.br_startoff = start;
+		*id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk;
+	} else {
+		error = -ENOENT;
+	}
 
-	xfs_iunlock(quotip, lock);
+	xfs_iunlock(quotip, lock_flags);
 
-	/* -ENXIO is essentially "no more data" */
-	if (error)
-		return (error == -ENXIO ? -ENOENT: error);
-
-	/* Convert next data offset back to a quota id */
-	*id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
-	return 0;
+	return error;
 }
 
 /*
@@ -762,7 +767,6 @@ xfs_qm_dqget(
 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
 	struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
 	struct xfs_dquot	*dqp;
-	loff_t			eof = 0;
 	int			error;
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -790,21 +794,6 @@ xfs_qm_dqget(
 	}
 #endif
 
-	/* Get the end of the quota file if we need it */
-	if (flags & XFS_QMOPT_DQNEXT) {
-		struct xfs_inode	*quotip;
-		xfs_fileoff_t		last;
-		uint			lock_mode;
-
-		quotip = xfs_quota_inode(mp, type);
-		lock_mode = xfs_ilock_data_map_shared(quotip);
-		error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
-		xfs_iunlock(quotip, lock_mode);
-		if (error)
-			return error;
-		eof = XFS_FSB_TO_B(mp, last);
-	}
-
 restart:
 	mutex_lock(&qi->qi_tree_lock);
 	dqp = radix_tree_lookup(tree, id);
@@ -823,7 +812,7 @@ xfs_qm_dqget(
 			if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 				xfs_dqunlock(dqp);
 				mutex_unlock(&qi->qi_tree_lock);
-				error = xfs_dq_get_next_id(mp, type, &id, eof);
+				error = xfs_dq_get_next_id(mp, type, &id);
 				if (error)
 					return error;
 				goto restart;
@@ -858,7 +847,7 @@ xfs_qm_dqget(
 
 	/* If we are asked to find next active id, keep looking */
 	if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
-		error = xfs_dq_get_next_id(mp, type, &id, eof);
+		error = xfs_dq_get_next_id(mp, type, &id);
 		if (!error)
 			goto restart;
 	}
@@ -917,7 +906,7 @@ xfs_qm_dqget(
 	if (flags & XFS_QMOPT_DQNEXT) {
 		if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
 			xfs_qm_dqput(dqp);
-			error = xfs_dq_get_next_id(mp, type, &id, eof);
+			error = xfs_dq_get_next_id(mp, type, &id);
 			if (error)
 				return error;
 			goto restart;

diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed7ee4e..2f4feb9 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c

@@ -22,103 +22,280 @@
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_error.h"
+#include "xfs_sysfs.h"
 
 #ifdef DEBUG
 
-int	xfs_etest[XFS_NUM_INJECT_ERROR];
-int64_t	xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
-char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
-int	xfs_error_test_active;
+static unsigned int xfs_errortag_random_default[] = {
+	XFS_RANDOM_DEFAULT,
+	XFS_RANDOM_IFLUSH_1,
+	XFS_RANDOM_IFLUSH_2,
+	XFS_RANDOM_IFLUSH_3,
+	XFS_RANDOM_IFLUSH_4,
+	XFS_RANDOM_IFLUSH_5,
+	XFS_RANDOM_IFLUSH_6,
+	XFS_RANDOM_DA_READ_BUF,
+	XFS_RANDOM_BTREE_CHECK_LBLOCK,
+	XFS_RANDOM_BTREE_CHECK_SBLOCK,
+	XFS_RANDOM_ALLOC_READ_AGF,
+	XFS_RANDOM_IALLOC_READ_AGI,
+	XFS_RANDOM_ITOBP_INOTOBP,
+	XFS_RANDOM_IUNLINK,
+	XFS_RANDOM_IUNLINK_REMOVE,
+	XFS_RANDOM_DIR_INO_VALIDATE,
+	XFS_RANDOM_BULKSTAT_READ_CHUNK,
+	XFS_RANDOM_IODONE_IOERR,
+	XFS_RANDOM_STRATREAD_IOERR,
+	XFS_RANDOM_STRATCMPL_IOERR,
+	XFS_RANDOM_DIOWRITE_IOERR,
+	XFS_RANDOM_BMAPIFORMAT,
+	XFS_RANDOM_FREE_EXTENT,
+	XFS_RANDOM_RMAP_FINISH_ONE,
+	XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE,
+	XFS_RANDOM_REFCOUNT_FINISH_ONE,
+	XFS_RANDOM_BMAP_FINISH_ONE,
+	XFS_RANDOM_AG_RESV_CRITICAL,
+	XFS_RANDOM_DROP_WRITES,
+	XFS_RANDOM_LOG_BAD_CRC,
+};
 
-int
-xfs_error_test(int error_tag, int *fsidp, char *expression,
-	       int line, char *file, unsigned long randfactor)
+struct xfs_errortag_attr {
+	struct attribute	attr;
+	unsigned int		tag;
+};
+
+static inline struct xfs_errortag_attr *
+to_attr(struct attribute *attr)
 {
-	int i;
-	int64_t fsid;
+	return container_of(attr, struct xfs_errortag_attr, attr);
+}
 
-	if (prandom_u32() % randfactor)
-		return 0;
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+	struct xfs_kobj *kobj = to_kobj(kobject);
 
-	memcpy(&fsid, fsidp, sizeof(xfs_fsid_t));
+	return container_of(kobj, struct xfs_mount, m_errortag_kobj);
+}
 
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
-			xfs_warn(NULL,
-	"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
-				expression, file, line, xfs_etest_fsname[i]);
-			return 1;
-		}
+STATIC ssize_t
+xfs_errortag_attr_store(
+	struct kobject		*kobject,
+	struct attribute	*attr,
+	const char		*buf,
+	size_t			count)
+{
+	struct xfs_mount	*mp = to_mp(kobject);
+	struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+	int			ret;
+	unsigned int		val;
+
+	if (strcmp(buf, "default") == 0) {
+		val = xfs_errortag_random_default[xfs_attr->tag];
+	} else {
+		ret = kstrtouint(buf, 0, &val);
+		if (ret)
+			return ret;
 	}
 
+	ret = xfs_errortag_set(mp, xfs_attr->tag, val);
+	if (ret)
+		return ret;
+	return count;
+}
+
+STATIC ssize_t
+xfs_errortag_attr_show(
+	struct kobject		*kobject,
+	struct attribute	*attr,
+	char			*buf)
+{
+	struct xfs_mount	*mp = to_mp(kobject);
+	struct xfs_errortag_attr *xfs_attr = to_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+			xfs_errortag_get(mp, xfs_attr->tag));
+}
+
+static const struct sysfs_ops xfs_errortag_sysfs_ops = {
+	.show = xfs_errortag_attr_show,
+	.store = xfs_errortag_attr_store,
+};
+
+#define XFS_ERRORTAG_ATTR_RW(_name, _tag) \
+static struct xfs_errortag_attr xfs_errortag_attr_##_name = {		\
+	.attr = {.name = __stringify(_name),				\
+		 .mode = VERIFY_OCTAL_PERMISSIONS(S_IWUSR | S_IRUGO) },	\
+	.tag	= (_tag),						\
+}
+
+#define XFS_ERRORTAG_ATTR_LIST(_name) &xfs_errortag_attr_##_name.attr
+
+XFS_ERRORTAG_ATTR_RW(noerror,		XFS_ERRTAG_NOERROR);
+XFS_ERRORTAG_ATTR_RW(iflush1,		XFS_ERRTAG_IFLUSH_1);
+XFS_ERRORTAG_ATTR_RW(iflush2,		XFS_ERRTAG_IFLUSH_2);
+XFS_ERRORTAG_ATTR_RW(iflush3,		XFS_ERRTAG_IFLUSH_3);
+XFS_ERRORTAG_ATTR_RW(iflush4,		XFS_ERRTAG_IFLUSH_4);
+XFS_ERRORTAG_ATTR_RW(iflush5,		XFS_ERRTAG_IFLUSH_5);
+XFS_ERRORTAG_ATTR_RW(iflush6,		XFS_ERRTAG_IFLUSH_6);
+XFS_ERRORTAG_ATTR_RW(dareadbuf,		XFS_ERRTAG_DA_READ_BUF);
+XFS_ERRORTAG_ATTR_RW(btree_chk_lblk,	XFS_ERRTAG_BTREE_CHECK_LBLOCK);
+XFS_ERRORTAG_ATTR_RW(btree_chk_sblk,	XFS_ERRTAG_BTREE_CHECK_SBLOCK);
+XFS_ERRORTAG_ATTR_RW(readagf,		XFS_ERRTAG_ALLOC_READ_AGF);
+XFS_ERRORTAG_ATTR_RW(readagi,		XFS_ERRTAG_IALLOC_READ_AGI);
+XFS_ERRORTAG_ATTR_RW(itobp,		XFS_ERRTAG_ITOBP_INOTOBP);
+XFS_ERRORTAG_ATTR_RW(iunlink,		XFS_ERRTAG_IUNLINK);
+XFS_ERRORTAG_ATTR_RW(iunlinkrm,		XFS_ERRTAG_IUNLINK_REMOVE);
+XFS_ERRORTAG_ATTR_RW(dirinovalid,	XFS_ERRTAG_DIR_INO_VALIDATE);
+XFS_ERRORTAG_ATTR_RW(bulkstat,		XFS_ERRTAG_BULKSTAT_READ_CHUNK);
+XFS_ERRORTAG_ATTR_RW(logiodone,		XFS_ERRTAG_IODONE_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratread,		XFS_ERRTAG_STRATREAD_IOERR);
+XFS_ERRORTAG_ATTR_RW(stratcmpl,		XFS_ERRTAG_STRATCMPL_IOERR);
+XFS_ERRORTAG_ATTR_RW(diowrite,		XFS_ERRTAG_DIOWRITE_IOERR);
+XFS_ERRORTAG_ATTR_RW(bmapifmt,		XFS_ERRTAG_BMAPIFORMAT);
+XFS_ERRORTAG_ATTR_RW(free_extent,	XFS_ERRTAG_FREE_EXTENT);
+XFS_ERRORTAG_ATTR_RW(rmap_finish_one,	XFS_ERRTAG_RMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(refcount_continue_update,	XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE);
+XFS_ERRORTAG_ATTR_RW(refcount_finish_one,	XFS_ERRTAG_REFCOUNT_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
+XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
+XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
+XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
+
+static struct attribute *xfs_errortag_attrs[] = {
+	XFS_ERRORTAG_ATTR_LIST(noerror),
+	XFS_ERRORTAG_ATTR_LIST(iflush1),
+	XFS_ERRORTAG_ATTR_LIST(iflush2),
+	XFS_ERRORTAG_ATTR_LIST(iflush3),
+	XFS_ERRORTAG_ATTR_LIST(iflush4),
+	XFS_ERRORTAG_ATTR_LIST(iflush5),
+	XFS_ERRORTAG_ATTR_LIST(iflush6),
+	XFS_ERRORTAG_ATTR_LIST(dareadbuf),
+	XFS_ERRORTAG_ATTR_LIST(btree_chk_lblk),
+	XFS_ERRORTAG_ATTR_LIST(btree_chk_sblk),
+	XFS_ERRORTAG_ATTR_LIST(readagf),
+	XFS_ERRORTAG_ATTR_LIST(readagi),
+	XFS_ERRORTAG_ATTR_LIST(itobp),
+	XFS_ERRORTAG_ATTR_LIST(iunlink),
+	XFS_ERRORTAG_ATTR_LIST(iunlinkrm),
+	XFS_ERRORTAG_ATTR_LIST(dirinovalid),
+	XFS_ERRORTAG_ATTR_LIST(bulkstat),
+	XFS_ERRORTAG_ATTR_LIST(logiodone),
+	XFS_ERRORTAG_ATTR_LIST(stratread),
+	XFS_ERRORTAG_ATTR_LIST(stratcmpl),
+	XFS_ERRORTAG_ATTR_LIST(diowrite),
+	XFS_ERRORTAG_ATTR_LIST(bmapifmt),
+	XFS_ERRORTAG_ATTR_LIST(free_extent),
+	XFS_ERRORTAG_ATTR_LIST(rmap_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(refcount_continue_update),
+	XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
+	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
+	XFS_ERRORTAG_ATTR_LIST(drop_writes),
+	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
+	NULL,
+};
+
+struct kobj_type xfs_errortag_ktype = {
+	.release = xfs_sysfs_release,
+	.sysfs_ops = &xfs_errortag_sysfs_ops,
+	.default_attrs = xfs_errortag_attrs,
+};
+
+int
+xfs_errortag_init(
+	struct xfs_mount	*mp)
+{
+	mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
+			KM_SLEEP | KM_MAYFAIL);
+	if (!mp->m_errortag)
+		return -ENOMEM;
+
+	return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
+			       &mp->m_kobj, "errortag");
+}
+
+void
+xfs_errortag_del(
+	struct xfs_mount	*mp)
+{
+	xfs_sysfs_del(&mp->m_errortag_kobj);
+	kmem_free(mp->m_errortag);
+}
+
+bool
+xfs_errortag_test(
+	struct xfs_mount	*mp,
+	const char		*expression,
+	const char		*file,
+	int			line,
+	unsigned int		error_tag)
+{
+	unsigned int		randfactor;
+
+	/*
+	 * To be able to use error injection anywhere, we need to ensure error
+	 * injection mechanism is already initialized.
+	 *
+	 * Code paths like I/O completion can be called before the
+	 * initialization is complete, but be able to inject errors in such
+	 * places is still useful.
+	 */
+	if (!mp->m_errortag)
+		return false;
+
+	ASSERT(error_tag < XFS_ERRTAG_MAX);
+	randfactor = mp->m_errortag[error_tag];
+	if (!randfactor || prandom_u32() % randfactor)
+		return false;
+
+	xfs_warn_ratelimited(mp,
+"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
+			expression, file, line, mp->m_fsname);
+	return true;
+}
+
+int
+xfs_errortag_get(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag)
+{
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return -EINVAL;
+
+	return mp->m_errortag[error_tag];
+}
+
+int
+xfs_errortag_set(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag,
+	unsigned int		tag_value)
+{
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return -EINVAL;
+
+	mp->m_errortag[error_tag] = tag_value;
 	return 0;
 }
 
 int
-xfs_errortag_add(unsigned int error_tag, xfs_mount_t *mp)
+xfs_errortag_add(
+	struct xfs_mount	*mp,
+	unsigned int		error_tag)
 {
-	int i;
-	int len;
-	int64_t fsid;
-
 	if (error_tag >= XFS_ERRTAG_MAX)
 		return -EINVAL;
 
-	memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
-			xfs_warn(mp, "error tag #%d on", error_tag);
-			return 0;
-		}
-	}
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++)  {
-		if (xfs_etest[i] == 0) {
-			xfs_warn(mp, "Turned on XFS error tag #%d",
-				error_tag);
-			xfs_etest[i] = error_tag;
-			xfs_etest_fsid[i] = fsid;
-			len = strlen(mp->m_fsname);
-			xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
-			strcpy(xfs_etest_fsname[i], mp->m_fsname);
-			xfs_error_test_active++;
-			return 0;
-		}
-	}
-
-	xfs_warn(mp, "error tag overflow, too many turned on");
-
-	return 1;
+	return xfs_errortag_set(mp, error_tag,
+			xfs_errortag_random_default[error_tag]);
 }
 
 int
-xfs_errortag_clearall(xfs_mount_t *mp, int loud)
+xfs_errortag_clearall(
+	struct xfs_mount	*mp)
 {
-	int64_t fsid;
-	int cleared = 0;
-	int i;
-
-	memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
-
-	for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
-		if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
-		     xfs_etest[i] != 0) {
-			cleared = 1;
-			xfs_warn(mp, "Clearing XFS error tag #%d",
-				xfs_etest[i]);
-			xfs_etest[i] = 0;
-			xfs_etest_fsid[i] = 0LL;
-			kmem_free(xfs_etest_fsname[i]);
-			xfs_etest_fsname[i] = NULL;
-			xfs_error_test_active--;
-		}
-	}
-
-	if (loud || cleared)
-		xfs_warn(mp, "Cleared all XFS error tags for filesystem");
-
+	memset(mp->m_errortag, 0, sizeof(unsigned int) * XFS_ERRTAG_MAX);
 	return 0;
 }
 #endif /* DEBUG */

diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 05f8666..7577be5 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h

@@ -96,7 +96,17 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_ERRTAG_REFCOUNT_FINISH_ONE			25
 #define XFS_ERRTAG_BMAP_FINISH_ONE			26
 #define XFS_ERRTAG_AG_RESV_CRITICAL			27
-#define XFS_ERRTAG_MAX					28
+/*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are silenty dropped and handled as if they failed.
+ * All delalloc blocks in the range of the write (including pre-existing
+ * delalloc blocks!) are tossed as part of the write failure error
+ * handling sequence.
+ */
+#define XFS_ERRTAG_DROP_WRITES				28
+#define XFS_ERRTAG_LOG_BAD_CRC				29
+#define XFS_ERRTAG_MAX					30
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -129,23 +139,29 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_RANDOM_REFCOUNT_FINISH_ONE			1
 #define XFS_RANDOM_BMAP_FINISH_ONE			1
 #define XFS_RANDOM_AG_RESV_CRITICAL			4
+#define XFS_RANDOM_DROP_WRITES				1
+#define XFS_RANDOM_LOG_BAD_CRC				1
 
 #ifdef DEBUG
-extern int xfs_error_test_active;
-extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
+extern int xfs_errortag_init(struct xfs_mount *mp);
+extern void xfs_errortag_del(struct xfs_mount *mp);
+extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
+		const char *file, int line, unsigned int error_tag);
+#define XFS_TEST_ERROR(expr, mp, tag)		\
+	((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
 
-#define	XFS_NUM_INJECT_ERROR				10
-#define XFS_TEST_ERROR(expr, mp, tag, rf)		\
-	((expr) || (xfs_error_test_active && \
-	 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
-			(rf))))
-
-extern int xfs_errortag_add(unsigned int error_tag, struct xfs_mount *mp);
-extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
+extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
+		unsigned int tag_value);
+extern int xfs_errortag_add(struct xfs_mount *mp, unsigned int error_tag);
+extern int xfs_errortag_clearall(struct xfs_mount *mp);
 #else
-#define XFS_TEST_ERROR(expr, mp, tag, rf)	(expr)
-#define xfs_errortag_add(tag, mp)		(ENOSYS)
-#define xfs_errortag_clearall(mp, loud)		(ENOSYS)
+#define xfs_errortag_init(mp)			(0)
+#define xfs_errortag_del(mp)
+#define XFS_TEST_ERROR(expr, mp, tag)		(expr)
+#define xfs_errortag_set(mp, tag, val)		(ENOSYS)
+#define xfs_errortag_add(mp, tag)		(ENOSYS)
+#define xfs_errortag_clearall(mp)		(ENOSYS)
 #endif /* DEBUG */
 
 /*

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 51dfae5..c4893e2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c

@@ -679,6 +679,7 @@ xfs_file_buffered_aio_write(
 		xfs_iunlock(ip, iolock);
 		eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
 		xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+		xfs_icache_free_cowblocks(ip->i_mount, &eofb);
 		goto write_retry;
 	}
 
@@ -970,362 +971,7 @@ xfs_file_readdir(
 	 */
 	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
 
-	return xfs_readdir(ip, ctx, bufsize);
-}
-
-/*
- * This type is designed to indicate the type of offset we would like
- * to search from page cache for xfs_seek_hole_data().
- */
-enum {
-	HOLE_OFF = 0,
-	DATA_OFF,
-};
-
-/*
- * Lookup the desired type of offset from the given page.
- *
- * On success, return true and the offset argument will point to the
- * start of the region that was found.  Otherwise this function will
- * return false and keep the offset argument unchanged.
- */
-STATIC bool
-xfs_lookup_buffer_offset(
-	struct page		*page,
-	loff_t			*offset,
-	unsigned int		type)
-{
-	loff_t			lastoff = page_offset(page);
-	bool			found = false;
-	struct buffer_head	*bh, *head;
-
-	bh = head = page_buffers(page);
-	do {
-		/*
-		 * Unwritten extents that have data in the page
-		 * cache covering them can be identified by the
-		 * BH_Unwritten state flag.  Pages with multiple
-		 * buffers might have a mix of holes, data and
-		 * unwritten extents - any buffer with valid
-		 * data in it should have BH_Uptodate flag set
-		 * on it.
-		 */
-		if (buffer_unwritten(bh) ||
-		    buffer_uptodate(bh)) {
-			if (type == DATA_OFF)
-				found = true;
-		} else {
-			if (type == HOLE_OFF)
-				found = true;
-		}
-
-		if (found) {
-			*offset = lastoff;
-			break;
-		}
-		lastoff += bh->b_size;
-	} while ((bh = bh->b_this_page) != head);
-
-	return found;
-}
-
-/*
- * This routine is called to find out and return a data or hole offset
- * from the page cache for unwritten extents according to the desired
- * type for xfs_seek_hole_data().
- *
- * The argument offset is used to tell where we start to search from the
- * page cache.  Map is used to figure out the end points of the range to
- * lookup pages.
- *
- * Return true if the desired type of offset was found, and the argument
- * offset is filled with that address.  Otherwise, return false and keep
- * offset unchanged.
- */
-STATIC bool
-xfs_find_get_desired_pgoff(
-	struct inode		*inode,
-	struct xfs_bmbt_irec	*map,
-	unsigned int		type,
-	loff_t			*offset)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	struct pagevec		pvec;
-	pgoff_t			index;
-	pgoff_t			end;
-	loff_t			endoff;
-	loff_t			startoff = *offset;
-	loff_t			lastoff = startoff;
-	bool			found = false;
-
-	pagevec_init(&pvec, 0);
-
-	index = startoff >> PAGE_SHIFT;
-	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
-	end = (endoff - 1) >> PAGE_SHIFT;
-	do {
-		int		want;
-		unsigned	nr_pages;
-		unsigned int	i;
-
-		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
-		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
-					  want);
-		if (nr_pages == 0)
-			break;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page	*page = pvec.pages[i];
-			loff_t		b_offset;
-
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL),
-			 * or even swizzled back from swapper_space to tmpfs
-			 * file mapping. However, page->index will not change
-			 * because we have a reference on the page.
-			 *
-			 * If current page offset is beyond where we've ended,
-			 * we've found a hole.
-			 */
-			if (type == HOLE_OFF && lastoff < endoff &&
-			    lastoff < page_offset(pvec.pages[i])) {
-				found = true;
-				*offset = lastoff;
-				goto out;
-			}
-			/* Searching done if the page index is out of range. */
-			if (page->index > end)
-				goto out;
-
-			lock_page(page);
-			/*
-			 * Page truncated or invalidated(page->mapping == NULL).
-			 * We can freely skip it and proceed to check the next
-			 * page.
-			 */
-			if (unlikely(page->mapping != inode->i_mapping)) {
-				unlock_page(page);
-				continue;
-			}
-
-			if (!page_has_buffers(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			found = xfs_lookup_buffer_offset(page, &b_offset, type);
-			if (found) {
-				/*
-				 * The found offset may be less than the start
-				 * point to search if this is the first time to
-				 * come here.
-				 */
-				*offset = max_t(loff_t, startoff, b_offset);
-				unlock_page(page);
-				goto out;
-			}
-
-			/*
-			 * We either searching data but nothing was found, or
-			 * searching hole but found a data buffer.  In either
-			 * case, probably the next page contains the desired
-			 * things, update the last offset to it so.
-			 */
-			lastoff = page_offset(page) + PAGE_SIZE;
-			unlock_page(page);
-		}
-
-		/*
-		 * The number of returned pages less than our desired, search
-		 * done.
-		 */
-		if (nr_pages < want)
-			break;
-
-		index = pvec.pages[i - 1]->index + 1;
-		pagevec_release(&pvec);
-	} while (index <= end);
-
-	/* No page at lastoff and we are not done - we found a hole. */
-	if (type == HOLE_OFF && lastoff < endoff) {
-		*offset = lastoff;
-		found = true;
-	}
-out:
-	pagevec_release(&pvec);
-	return found;
-}
-
-/*
- * caller must lock inode with xfs_ilock_data_map_shared,
- * can we craft an appropriate ASSERT?
- *
- * end is because the VFS-level lseek interface is defined such that any
- * offset past i_size shall return -ENXIO, but we use this for quota code
- * which does not maintain i_size, and we want to SEEK_DATA past i_size.
- */
-loff_t
-__xfs_seek_hole_data(
-	struct inode		*inode,
-	loff_t			start,
-	loff_t			end,
-	int			whence)
-{
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	loff_t			uninitialized_var(offset);
-	xfs_fileoff_t		fsbno;
-	xfs_filblks_t		lastbno;
-	int			error;
-
-	if (start >= end) {
-		error = -ENXIO;
-		goto out_error;
-	}
-
-	/*
-	 * Try to read extents from the first block indicated
-	 * by fsbno to the end block of the file.
-	 */
-	fsbno = XFS_B_TO_FSBT(mp, start);
-	lastbno = XFS_B_TO_FSB(mp, end);
-
-	for (;;) {
-		struct xfs_bmbt_irec	map[2];
-		int			nmap = 2;
-		unsigned int		i;
-
-		error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
-				       XFS_BMAPI_ENTIRE);
-		if (error)
-			goto out_error;
-
-		/* No extents at given offset, must be beyond EOF */
-		if (nmap == 0) {
-			error = -ENXIO;
-			goto out_error;
-		}
-
-		for (i = 0; i < nmap; i++) {
-			offset = max_t(loff_t, start,
-				       XFS_FSB_TO_B(mp, map[i].br_startoff));
-
-			/* Landed in the hole we wanted? */
-			if (whence == SEEK_HOLE &&
-			    map[i].br_startblock == HOLESTARTBLOCK)
-				goto out;
-
-			/* Landed in the data extent we wanted? */
-			if (whence == SEEK_DATA &&
-			    (map[i].br_startblock == DELAYSTARTBLOCK ||
-			     (map[i].br_state == XFS_EXT_NORM &&
-			      !isnullstartblock(map[i].br_startblock))))
-				goto out;
-
-			/*
-			 * Landed in an unwritten extent, try to search
-			 * for hole or data from page cache.
-			 */
-			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
-				if (xfs_find_get_desired_pgoff(inode, &map[i],
-				      whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF,
-							&offset))
-					goto out;
-			}
-		}
-
-		/*
-		 * We only received one extent out of the two requested. This
-		 * means we've hit EOF and didn't find what we are looking for.
-		 */
-		if (nmap == 1) {
-			/*
-			 * If we were looking for a hole, set offset to
-			 * the end of the file (i.e., there is an implicit
-			 * hole at the end of any file).
-		 	 */
-			if (whence == SEEK_HOLE) {
-				offset = end;
-				break;
-			}
-			/*
-			 * If we were looking for data, it's nowhere to be found
-			 */
-			ASSERT(whence == SEEK_DATA);
-			error = -ENXIO;
-			goto out_error;
-		}
-
-		ASSERT(i > 1);
-
-		/*
-		 * Nothing was found, proceed to the next round of search
-		 * if the next reading offset is not at or beyond EOF.
-		 */
-		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
-		start = XFS_FSB_TO_B(mp, fsbno);
-		if (start >= end) {
-			if (whence == SEEK_HOLE) {
-				offset = end;
-				break;
-			}
-			ASSERT(whence == SEEK_DATA);
-			error = -ENXIO;
-			goto out_error;
-		}
-	}
-
-out:
-	/*
-	 * If at this point we have found the hole we wanted, the returned
-	 * offset may be bigger than the file size as it may be aligned to
-	 * page boundary for unwritten extents.  We need to deal with this
-	 * situation in particular.
-	 */
-	if (whence == SEEK_HOLE)
-		offset = min_t(loff_t, offset, end);
-
-	return offset;
-
-out_error:
-	return error;
-}
-
-STATIC loff_t
-xfs_seek_hole_data(
-	struct file		*file,
-	loff_t			start,
-	int			whence)
-{
-	struct inode		*inode = file->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	uint			lock;
-	loff_t			offset, end;
-	int			error = 0;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	lock = xfs_ilock_data_map_shared(ip);
-
-	end = i_size_read(inode);
-	offset = __xfs_seek_hole_data(inode, start, end, whence);
-	if (offset < 0) {
-		error = offset;
-		goto out_unlock;
-	}
-
-	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-
-out_unlock:
-	xfs_iunlock(ip, lock);
-
-	if (error)
-		return error;
-	return offset;
+	return xfs_readdir(NULL, ip, ctx, bufsize);
 }
 
 STATIC loff_t
@@ -1334,17 +980,25 @@ xfs_file_llseek(
 	loff_t		offset,
 	int		whence)
 {
+	struct inode		*inode = file->f_mapping->host;
+
+	if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount))
+		return -EIO;
+
 	switch (whence) {
-	case SEEK_END:
-	case SEEK_CUR:
-	case SEEK_SET:
+	default:
 		return generic_file_llseek(file, offset, whence);
 	case SEEK_HOLE:
+		offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+		break;
 	case SEEK_DATA:
-		return xfs_seek_hole_data(file, offset, whence);
-	default:
-		return -EINVAL;
+		offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+		break;
 	}
+
+	if (offset < 0)
+		return offset;
+	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 }
 
 /*

diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 6ccaae9..8f22fc5 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c

@@ -602,7 +602,7 @@ xfs_growfs_data_private(
 	if (nagimax)
 		mp->m_maxagi = nagimax;
 	if (mp->m_sb.sb_imax_pct) {
-		__uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
+		uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
 		do_div(icount, 100);
 		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
 	} else
@@ -793,17 +793,17 @@ xfs_fs_counts(
 int
 xfs_reserve_blocks(
 	xfs_mount_t             *mp,
-	__uint64_t              *inval,
+	uint64_t              *inval,
 	xfs_fsop_resblks_t      *outval)
 {
-	__int64_t		lcounter, delta;
-	__int64_t		fdblks_delta = 0;
-	__uint64_t		request;
-	__int64_t		free;
+	int64_t			lcounter, delta;
+	int64_t			fdblks_delta = 0;
+	uint64_t		request;
+	int64_t			free;
 	int			error = 0;
 
 	/* If inval is null, report current values and return */
-	if (inval == (__uint64_t *)NULL) {
+	if (inval == (uint64_t *)NULL) {
 		if (!outval)
 			return -EINVAL;
 		outval->resblks = mp->m_resblks;
@@ -904,7 +904,7 @@ xfs_reserve_blocks(
 int
 xfs_fs_goingdown(
 	xfs_mount_t	*mp,
-	__uint32_t	inflags)
+	uint32_t	inflags)
 {
 	switch (inflags) {
 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {

diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index f349158..2954c13 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h

@@ -22,9 +22,9 @@ extern int xfs_fs_geometry(xfs_mount_t *mp, xfs_fsop_geom_t *geo, int nversion);
 extern int xfs_growfs_data(xfs_mount_t *mp, xfs_growfs_data_t *in);
 extern int xfs_growfs_log(xfs_mount_t *mp, xfs_growfs_log_t *in);
 extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
-extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
+extern int xfs_reserve_blocks(xfs_mount_t *mp, uint64_t *inval,
 				xfs_fsop_resblks_t *outval);
-extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
+extern int xfs_fs_goingdown(xfs_mount_t *mp, uint32_t inflags);
 
 extern int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
 extern int xfs_fs_unreserve_ag_blocks(struct xfs_mount *mp);

diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 687a4b0..3e1cc30 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c

@@ -47,4 +47,9 @@ xfs_param_t xfs_params = {
 
 struct xfs_globals xfs_globals = {
 	.log_recovery_delay	=	0,	/* no delay by default */
+#ifdef XFS_ASSERT_FATAL
+	.bug_on_assert		=	true,	/* assert failures BUG() */
+#else
+	.bug_on_assert		=	false,	/* assert failures WARN() */
+#endif
 };

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index b9c12e1..0a9e698 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c

@@ -368,6 +368,11 @@ xfs_iget_cache_hit(
 	if (ip->i_flags & XFS_IRECLAIMABLE) {
 		trace_xfs_iget_reclaim(ip);
 
+		if (flags & XFS_IGET_INCORE) {
+			error = -EAGAIN;
+			goto out_error;
+		}
+
 		/*
 		 * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
 		 * from stomping over us while we recycle the inode.  We can't
@@ -432,7 +437,8 @@ xfs_iget_cache_hit(
 	if (lock_flags != 0)
 		xfs_ilock(ip, lock_flags);
 
-	xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
+	if (!(flags & XFS_IGET_INCORE))
+		xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
 	XFS_STATS_INC(mp, xs_ig_found);
 
 	return 0;
@@ -603,6 +609,10 @@ xfs_iget(
 			goto out_error_or_again;
 	} else {
 		rcu_read_unlock();
+		if (flags & XFS_IGET_INCORE) {
+			error = -ENOENT;
+			goto out_error_or_again;
+		}
 		XFS_STATS_INC(mp, xs_ig_missed);
 
 		error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
@@ -623,7 +633,7 @@ xfs_iget(
 	return 0;
 
 out_error_or_again:
-	if (error == -EAGAIN) {
+	if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
 		delay(1);
 		goto again;
 	}
@@ -632,6 +642,44 @@ xfs_iget(
 }
 
 /*
+ * "Is this a cached inode that's also allocated?"
+ *
+ * Look up an inode by number in the given file system.  If the inode is
+ * in cache and isn't in purgatory, return 1 if the inode is allocated
+ * and 0 if it is not.  For all other cases (not in cache, being torn
+ * down, etc.), return a negative error code.
+ *
+ * The caller has to prevent inode allocation and freeing activity,
+ * presumably by locking the AGI buffer.   This is to ensure that an
+ * inode cannot transition from allocated to freed until the caller is
+ * ready to allow that.  If the inode is in an intermediate state (new,
+ * reclaimable, or being reclaimed), -EAGAIN will be returned; if the
+ * inode is not in the cache, -ENOENT will be returned.  The caller must
+ * deal with these scenarios appropriately.
+ *
+ * This is a specialized use case for the online scrubber; if you're
+ * reading this, you probably want xfs_iget.
+ */
+int
+xfs_icache_inode_is_allocated(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_ino_t		ino,
+	bool			*inuse)
+{
+	struct xfs_inode	*ip;
+	int			error;
+
+	error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
+	if (error)
+		return error;
+
+	*inuse = !!(VFS_I(ip)->i_mode);
+	IRELE(ip);
+	return 0;
+}
+
+/*
  * The inode lookup is done in batches to keep the amount of lock traffic and
  * radix tree lookups to a minimum. The batch size is a trade off between
  * lookup reduction and stack usage. This is in the reclaim path, so we can't

diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9183f77..bff4d85 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h

@@ -47,6 +47,7 @@ struct xfs_eofblocks {
 #define XFS_IGET_CREATE		0x1
 #define XFS_IGET_UNTRUSTED	0x2
 #define XFS_IGET_DONTCACHE	0x4
+#define XFS_IGET_INCORE		0x8	/* don't read from disk or reinit */
 
 /*
  * flags for AG inode iterator
@@ -126,4 +127,7 @@ xfs_fs_eofblocks_from_user(
 	return 0;
 }
 
+int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
+				  xfs_ino_t ino, bool *inuse);
+
 #endif

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0a1e84..ceef77c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c

@@ -632,7 +632,7 @@ __xfs_iflock(
 
 STATIC uint
 _xfs_dic2xflags(
-	__uint16_t		di_flags,
+	uint16_t		di_flags,
 	uint64_t		di_flags2,
 	bool			has_attr)
 {
@@ -855,8 +855,8 @@ xfs_ialloc(
 		inode->i_version = 1;
 		ip->i_d.di_flags2 = 0;
 		ip->i_d.di_cowextsize = 0;
-		ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
-		ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
+		ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
+		ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
 	}
 
 
@@ -3489,7 +3489,7 @@ xfs_iflush_int(
 	dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
 
 	if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
-			       mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
+			       mp, XFS_ERRTAG_IFLUSH_1)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
 			__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
@@ -3499,7 +3499,7 @@ xfs_iflush_int(
 		if (XFS_TEST_ERROR(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
-		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
+		    mp, XFS_ERRTAG_IFLUSH_3)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad regular inode %Lu, ptr 0x%p",
 				__func__, ip->i_ino, ip);
@@ -3510,7 +3510,7 @@ xfs_iflush_int(
 		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
 		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
-		    mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
+		    mp, XFS_ERRTAG_IFLUSH_4)) {
 			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 				"%s: Bad directory inode %Lu, ptr 0x%p",
 				__func__, ip->i_ino, ip);
@@ -3518,8 +3518,7 @@ xfs_iflush_int(
 		}
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
-				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
-				XFS_RANDOM_IFLUSH_5)) {
+				ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: detected corrupt incore inode %Lu, "
 			"total extents = %d, nblocks = %Ld, ptr 0x%p",
@@ -3529,7 +3528,7 @@ xfs_iflush_int(
 		goto corrupt_out;
 	}
 	if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
-				mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
+				mp, XFS_ERRTAG_IFLUSH_6)) {
 		xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
 			"%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 10e89fc..0ee453d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h

@@ -192,8 +192,8 @@ static inline void
 xfs_set_projid(struct xfs_inode *ip,
 		prid_t projid)
 {
-	ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
-	ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+	ip->i_d.di_projid_hi = (uint16_t) (projid >> 16);
+	ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff);
 }
 
 static inline prid_t
@@ -445,9 +445,6 @@ int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
 		     xfs_fsize_t isize, bool *did_zeroing);
 int	xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
 		bool *did_zero);
-loff_t	__xfs_seek_hole_data(struct inode *inode, loff_t start,
-			     loff_t eof, int whence);
-
 
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6190697..9c0c7a9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c

@@ -120,8 +120,7 @@ xfs_find_handle(
 		handle.ha_fid.fid_pad = 0;
 		handle.ha_fid.fid_gen = inode->i_generation;
 		handle.ha_fid.fid_ino = ip->i_ino;
-
-		hsize = XFS_HSIZE(handle);
+		hsize = sizeof(xfs_handle_t);
 	}
 
 	error = -EFAULT;
@@ -444,8 +443,8 @@ xfs_attrmulti_attr_get(
 	struct inode		*inode,
 	unsigned char		*name,
 	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags)
+	uint32_t		*len,
+	uint32_t		flags)
 {
 	unsigned char		*kbuf;
 	int			error = -EFAULT;
@@ -473,8 +472,8 @@ xfs_attrmulti_attr_set(
 	struct inode		*inode,
 	unsigned char		*name,
 	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags)
+	uint32_t		len,
+	uint32_t		flags)
 {
 	unsigned char		*kbuf;
 	int			error;
@@ -499,7 +498,7 @@ int
 xfs_attrmulti_attr_remove(
 	struct inode		*inode,
 	unsigned char		*name,
-	__uint32_t		flags)
+	uint32_t		flags)
 {
 	int			error;
 
@@ -877,7 +876,7 @@ xfs_merge_ioc_xflags(
 
 STATIC unsigned int
 xfs_di2lxflags(
-	__uint16_t	di_flags)
+	uint16_t	di_flags)
 {
 	unsigned int	flags = 0;
 
@@ -1288,7 +1287,7 @@ xfs_ioctl_setattr_check_projid(
 	struct fsxattr		*fa)
 {
 	/* Disallow 32bit project ids if projid32bit feature is not enabled. */
-	if (fa->fsx_projid > (__uint16_t)-1 &&
+	if (fa->fsx_projid > (uint16_t)-1 &&
 	    !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
 		return -EINVAL;
 
@@ -1932,7 +1931,7 @@ xfs_file_ioctl(
 
 	case XFS_IOC_SET_RESBLKS: {
 		xfs_fsop_resblks_t inout;
-		__uint64_t	   in;
+		uint64_t	   in;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
@@ -2018,12 +2017,12 @@ xfs_file_ioctl(
 	}
 
 	case XFS_IOC_GOINGDOWN: {
-		__uint32_t in;
+		uint32_t in;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		if (get_user(in, (__uint32_t __user *)arg))
+		if (get_user(in, (uint32_t __user *)arg))
 			return -EFAULT;
 
 		return xfs_fs_goingdown(mp, in);
@@ -2038,14 +2037,14 @@ xfs_file_ioctl(
 		if (copy_from_user(&in, arg, sizeof(in)))
 			return -EFAULT;
 
-		return xfs_errortag_add(in.errtag, mp);
+		return xfs_errortag_add(mp, in.errtag);
 	}
 
 	case XFS_IOC_ERROR_CLEARALL:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
-		return xfs_errortag_clearall(mp, 1);
+		return xfs_errortag_clearall(mp);
 
 	case XFS_IOC_FREE_EOFBLOCKS: {
 		struct xfs_fs_eofblocks eofb;

diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 8b52881..e86c3ea 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h

@@ -48,22 +48,22 @@ xfs_attrmulti_attr_get(
 	struct inode		*inode,
 	unsigned char		*name,
 	unsigned char		__user *ubuf,
-	__uint32_t		*len,
-	__uint32_t		flags);
+	uint32_t		*len,
+	uint32_t		flags);
 
 extern int
 xfs_attrmulti_attr_set(
 	struct inode		*inode,
 	unsigned char		*name,
 	const unsigned char	__user *ubuf,
-	__uint32_t		len,
-	__uint32_t		flags);
+	uint32_t		len,
+	uint32_t		flags);
 
 extern int
 xfs_attrmulti_attr_remove(
 	struct inode		*inode,
 	unsigned char		*name,
-	__uint32_t		flags);
+	uint32_t		flags);
 
 extern struct dentry *
 xfs_handle_to_dentry(

diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index b1bb454..5492bcf 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h

@@ -112,9 +112,9 @@ typedef struct compat_xfs_fsop_handlereq {
 
 /* The bstat field in the swapext struct needs translation */
 typedef struct compat_xfs_swapext {
-	__int64_t		sx_version;	/* version */
-	__int64_t		sx_fdtarget;	/* fd of target file */
-	__int64_t		sx_fdtmp;	/* fd of tmp file */
+	int64_t			sx_version;	/* version */
+	int64_t			sx_fdtarget;	/* fd of target file */
+	int64_t			sx_fdtmp;	/* fd of tmp file */
 	xfs_off_t		sx_offset;	/* offset into file */
 	xfs_off_t		sx_length;	/* leng from offset */
 	char			sx_pad[16];	/* pad space, unused */

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 05dc87e..813394c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c

@@ -543,7 +543,7 @@ xfs_file_iomap_begin_delay(
 	if (unlikely(XFS_TEST_ERROR(
 	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
 	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
-	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
 		error = -EFSCORRUPTED;
 		goto out_unlock;
@@ -1119,7 +1119,7 @@ xfs_file_iomap_end_delalloc(
 	 * Behave as if the write failed if drop writes is enabled. Set the NEW
 	 * flag to force delalloc cleanup.
 	 */
-	if (xfs_mp_drop_writes(mp)) {
+	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
 		iomap->flags |= IOMAP_F_NEW;
 		written = 0;
 	}

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ebfc133..469c9fa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c

@@ -190,12 +190,12 @@ xfs_generic_create(
 
 #ifdef CONFIG_XFS_POSIX_ACL
 	if (default_acl) {
-		error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+		error = __xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		if (error)
 			goto out_cleanup_inode;
 	}
 	if (acl) {
-		error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+		error = __xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
 		if (error)
 			goto out_cleanup_inode;
 	}
@@ -460,7 +460,7 @@ xfs_vn_get_link(
 	if (!dentry)
 		return ERR_PTR(-ECHILD);
 
-	link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+	link = kmalloc(XFS_SYMLINK_MAXLEN+1, GFP_KERNEL);
 	if (!link)
 		goto out_err;
 

diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 26d67ce..c393a2f 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c

@@ -31,7 +31,7 @@
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 
-STATIC int
+int
 xfs_internal_inum(
 	xfs_mount_t	*mp,
 	xfs_ino_t	ino)

diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b39..17e86e0 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h

@@ -96,4 +96,6 @@ xfs_inumbers(
 	void			__user *buffer, /* buffer with inode info */
 	inumbers_fmt_pf		formatter);
 
+int xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+
 #endif	/* __XFS_ITABLE_H__ */

diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 2d167fe..9301c5a 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h

@@ -24,14 +24,6 @@
 /*
  * Kernel specific type declarations for XFS
  */
-typedef signed char		__int8_t;
-typedef unsigned char		__uint8_t;
-typedef signed short int	__int16_t;
-typedef unsigned short int	__uint16_t;
-typedef signed int		__int32_t;
-typedef unsigned int		__uint32_t;
-typedef signed long long int	__int64_t;
-typedef unsigned long long int	__uint64_t;
 
 typedef __s64			xfs_off_t;	/* <file offset> type */
 typedef unsigned long long	xfs_ino_t;	/* <inode> type */
@@ -151,7 +143,6 @@ typedef __u32			xfs_nlink_t;
 #define __return_address __builtin_return_address(0)
 
 #define XFS_PROJID_DEFAULT	0
-#define MAXPATHLEN	1024
 
 #define MIN(a,b)	(min(a,b))
 #define MAX(a,b)	(max(a,b))
@@ -186,22 +177,22 @@ extern struct xstats xfsstats;
  * are converting to the init_user_ns. The uid is later mapped to a particular
  * user namespace value when crossing the kernel/user boundary.
  */
-static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
+static inline uint32_t xfs_kuid_to_uid(kuid_t uid)
 {
 	return from_kuid(&init_user_ns, uid);
 }
 
-static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
+static inline kuid_t xfs_uid_to_kuid(uint32_t uid)
 {
 	return make_kuid(&init_user_ns, uid);
 }
 
-static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
+static inline uint32_t xfs_kgid_to_gid(kgid_t gid)
 {
 	return from_kgid(&init_user_ns, gid);
 }
 
-static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
+static inline kgid_t xfs_gid_to_kgid(uint32_t gid)
 {
 	return make_kgid(&init_user_ns, gid);
 }
@@ -231,14 +222,14 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
 
 #define do_mod(a, b)	xfs_do_mod(&(a), (b), sizeof(a))
 
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+static inline uint64_t roundup_64(uint64_t x, uint32_t y)
 {
 	x += y - 1;
 	do_div(x, y);
 	return x * y;
 }
 
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+static inline uint64_t howmany_64(uint64_t x, uint32_t y)
 {
 	x += y - 1;
 	do_div(x, y);

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3731f13..0053bcf 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c

@@ -434,7 +434,7 @@ xfs_log_reserve(
 	int		 	unit_bytes,
 	int		 	cnt,
 	struct xlog_ticket	**ticp,
-	__uint8_t	 	client,
+	uint8_t		 	client,
 	bool			permanent)
 {
 	struct xlog		*log = mp->m_log;
@@ -825,9 +825,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		if (!error) {
 			/* the data section must be 32 bit size aligned */
 			struct {
-			    __uint16_t magic;
-			    __uint16_t pad1;
-			    __uint32_t pad2; /* may as well make it 64 bits */
+			    uint16_t magic;
+			    uint16_t pad1;
+			    uint32_t pad2; /* may as well make it 64 bits */
 			} magic = {
 				.magic = XLOG_UNMOUNT_TYPE,
 			};
@@ -1189,8 +1189,7 @@ xlog_iodone(xfs_buf_t *bp)
 	 * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
 	 * CRC errors into log recovery.
 	 */
-	if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
-			   XFS_RANDOM_IODONE_IOERR) ||
+	if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
 	    iclog->ic_state & XLOG_STATE_IOABORT) {
 		if (iclog->ic_state & XLOG_STATE_IOABORT)
 			iclog->ic_state &= ~XLOG_STATE_IOABORT;
@@ -1665,7 +1664,7 @@ xlog_cksum(
 	char			*dp,
 	int			size)
 {
-	__uint32_t		crc;
+	uint32_t		crc;
 
 	/* first generate the crc for the record header ... */
 	crc = xfs_start_cksum_update((char *)rhead,
@@ -1828,7 +1827,7 @@ xlog_sync(
 		 */
 		dptr = (char *)&iclog->ic_header + count;
 		for (i = 0; i < split; i += BBSIZE) {
-			__uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
+			uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
 			if (++cycle == XLOG_HEADER_MAGIC_NUM)
 				cycle++;
 			*(__be32 *)dptr = cpu_to_be32(cycle);
@@ -1842,7 +1841,6 @@ xlog_sync(
 	/* calculcate the checksum */
 	iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
 					    iclog->ic_datap, size);
-#ifdef DEBUG
 	/*
 	 * Intentionally corrupt the log record CRC based on the error injection
 	 * frequency, if defined. This facilitates testing log recovery in the
@@ -1850,15 +1848,13 @@ xlog_sync(
 	 * write on I/O completion and shutdown the fs. The subsequent mount
 	 * detects the bad CRC and attempts to recover.
 	 */
-	if (log->l_badcrc_factor &&
-	    (prandom_u32() % log->l_badcrc_factor == 0)) {
+	if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
 		iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
 		iclog->ic_state |= XLOG_STATE_IOABORT;
 		xfs_warn(log->l_mp,
 	"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
 			 be64_to_cpu(iclog->ic_header.h_lsn));
 	}
-#endif
 
 	bp->b_io_length = BTOBB(count);
 	bp->b_fspriv = iclog;
@@ -2024,7 +2020,7 @@ xlog_print_tic_res(
 	};
 #undef REG_TYPE_STR
 
-	xfs_warn(mp, "xlog_write: reservation summary:");
+	xfs_warn(mp, "ticket reservation summary:");
 	xfs_warn(mp, "  unit res    = %d bytes",
 		 ticket->t_unit_res);
 	xfs_warn(mp, "  current res = %d bytes",
@@ -2045,10 +2041,55 @@ xlog_print_tic_res(
 			    "bad-rtype" : res_type_str[r_type]),
 			    ticket->t_res_arr[i].r_len);
 	}
+}
 
-	xfs_alert_tag(mp, XFS_PTAG_LOGRES,
-		"xlog_write: reservation ran out. Need to up reservation");
-	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+/*
+ * Print a summary of the transaction.
+ */
+void
+xlog_print_trans(
+	struct xfs_trans		*tp)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_log_item_desc	*lidp;
+
+	/* dump core transaction and ticket info */
+	xfs_warn(mp, "transaction summary:");
+	xfs_warn(mp, "  flags	= 0x%x", tp->t_flags);
+
+	xlog_print_tic_res(mp, tp->t_ticket);
+
+	/* dump each log item */
+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+		struct xfs_log_item	*lip = lidp->lid_item;
+		struct xfs_log_vec	*lv = lip->li_lv;
+		struct xfs_log_iovec	*vec;
+		int			i;
+
+		xfs_warn(mp, "log item: ");
+		xfs_warn(mp, "  type	= 0x%x", lip->li_type);
+		xfs_warn(mp, "  flags	= 0x%x", lip->li_flags);
+		if (!lv)
+			continue;
+		xfs_warn(mp, "  niovecs	= %d", lv->lv_niovecs);
+		xfs_warn(mp, "  size	= %d", lv->lv_size);
+		xfs_warn(mp, "  bytes	= %d", lv->lv_bytes);
+		xfs_warn(mp, "  buf len	= %d", lv->lv_buf_len);
+
+		/* dump each iovec for the log item */
+		vec = lv->lv_iovecp;
+		for (i = 0; i < lv->lv_niovecs; i++) {
+			int dumplen = min(vec->i_len, 32);
+
+			xfs_warn(mp, "  iovec[%d]", i);
+			xfs_warn(mp, "    type	= 0x%x", vec->i_type);
+			xfs_warn(mp, "    len	= %d", vec->i_len);
+			xfs_warn(mp, "    first %d bytes of iovec[%d]:", dumplen, i);
+			xfs_hex_dump(vec->i_addr, dumplen);
+
+			vec++;
+		}
+	}
 }
 
 /*
@@ -2321,8 +2362,12 @@ xlog_write(
 	if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
 		ticket->t_curr_res -= sizeof(xlog_op_header_t);
 
-	if (ticket->t_curr_res < 0)
+	if (ticket->t_curr_res < 0) {
+		xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
+		     "ctx ticket reservation ran out. Need to up reservation");
 		xlog_print_tic_res(log->l_mp, ticket);
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+	}
 
 	index = 0;
 	lv = log_vector;
@@ -2363,8 +2408,8 @@ xlog_write(
 			}
 
 			reg = &vecp[index];
-			ASSERT(reg->i_len % sizeof(__int32_t) == 0);
-			ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+			ASSERT(reg->i_len % sizeof(int32_t) == 0);
+			ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
 
 			start_rec_copy = xlog_write_start_rec(ptr, ticket);
 			if (start_rec_copy) {
@@ -3143,7 +3188,7 @@ xlog_state_switch_iclogs(
 	/* Round up to next log-sunit */
 	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
-		__uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
+		uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
 		log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
 	}
 
@@ -3771,7 +3816,7 @@ xlog_verify_iclog(
 	xlog_in_core_2_t	*xhdr;
 	void			*base_ptr, *ptr, *p;
 	ptrdiff_t		field_offset;
-	__uint8_t		clientid;
+	uint8_t			clientid;
 	int			len, i, j, k, op_len;
 	int			idx;
 

diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index cc5a9f1..bf21277 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h

@@ -159,7 +159,7 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
 			  int		   length,
 			  int		   count,
 			  struct xlog_ticket **ticket,
-			  __uint8_t	   clientid,
+			  uint8_t		   clientid,
 			  bool		   permanent);
 int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 void      xfs_log_unmount(struct xfs_mount *mp);

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 82f1cbc..fbe72b1 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c

@@ -410,6 +410,7 @@ xlog_cil_insert_items(
 	int			len = 0;
 	int			diff_iovecs = 0;
 	int			iclog_space;
+	int			iovhdr_res = 0, split_res = 0, ctx_res = 0;
 
 	ASSERT(tp);
 
@@ -419,12 +420,65 @@ xlog_cil_insert_items(
 	 */
 	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
 
+	spin_lock(&cil->xc_cil_lock);
+
+	/* account for space used by new iovec headers  */
+	iovhdr_res = diff_iovecs * sizeof(xlog_op_header_t);
+	len += iovhdr_res;
+	ctx->nvecs += diff_iovecs;
+
+	/* attach the transaction to the CIL if it has any busy extents */
+	if (!list_empty(&tp->t_busy))
+		list_splice_init(&tp->t_busy, &ctx->busy_extents);
+
+	/*
+	 * Now transfer enough transaction reservation to the context ticket
+	 * for the checkpoint. The context ticket is special - the unit
+	 * reservation has to grow as well as the current reservation as we
+	 * steal from tickets so we can correctly determine the space used
+	 * during the transaction commit.
+	 */
+	if (ctx->ticket->t_curr_res == 0) {
+		ctx_res = ctx->ticket->t_unit_res;
+		ctx->ticket->t_curr_res = ctx_res;
+		tp->t_ticket->t_curr_res -= ctx_res;
+	}
+
+	/* do we need space for more log record headers? */
+	iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+	if (len > 0 && (ctx->space_used / iclog_space !=
+				(ctx->space_used + len) / iclog_space)) {
+		split_res = (len + iclog_space - 1) / iclog_space;
+		/* need to take into account split region headers, too */
+		split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+		ctx->ticket->t_unit_res += split_res;
+		ctx->ticket->t_curr_res += split_res;
+		tp->t_ticket->t_curr_res -= split_res;
+		ASSERT(tp->t_ticket->t_curr_res >= len);
+	}
+	tp->t_ticket->t_curr_res -= len;
+	ctx->space_used += len;
+
+	/*
+	 * If we've overrun the reservation, dump the tx details before we move
+	 * the log items. Shutdown is imminent...
+	 */
+	if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+		xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+		xfs_warn(log->l_mp,
+			 "  log items: %d bytes (iov hdrs: %d bytes)",
+			 len, iovhdr_res);
+		xfs_warn(log->l_mp, "  split region headers: %d bytes",
+			 split_res);
+		xfs_warn(log->l_mp, "  ctx ticket: %d bytes", ctx_res);
+		xlog_print_trans(tp);
+	}
+
 	/*
 	 * Now (re-)position everything modified at the tail of the CIL.
 	 * We do this here so we only need to take the CIL lock once during
 	 * the transaction commit.
 	 */
-	spin_lock(&cil->xc_cil_lock);
 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
 		struct xfs_log_item	*lip = lidp->lid_item;
 
@@ -441,44 +495,10 @@ xlog_cil_insert_items(
 			list_move_tail(&lip->li_cil, &cil->xc_cil);
 	}
 
-	/* account for space used by new iovec headers  */
-	len += diff_iovecs * sizeof(xlog_op_header_t);
-	ctx->nvecs += diff_iovecs;
-
-	/* attach the transaction to the CIL if it has any busy extents */
-	if (!list_empty(&tp->t_busy))
-		list_splice_init(&tp->t_busy, &ctx->busy_extents);
-
-	/*
-	 * Now transfer enough transaction reservation to the context ticket
-	 * for the checkpoint. The context ticket is special - the unit
-	 * reservation has to grow as well as the current reservation as we
-	 * steal from tickets so we can correctly determine the space used
-	 * during the transaction commit.
-	 */
-	if (ctx->ticket->t_curr_res == 0) {
-		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-		tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
-	}
-
-	/* do we need space for more log record headers? */
-	iclog_space = log->l_iclog_size - log->l_iclog_hsize;
-	if (len > 0 && (ctx->space_used / iclog_space !=
-				(ctx->space_used + len) / iclog_space)) {
-		int hdrs;
-
-		hdrs = (len + iclog_space - 1) / iclog_space;
-		/* need to take into account split region headers, too */
-		hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
-		ctx->ticket->t_unit_res += hdrs;
-		ctx->ticket->t_curr_res += hdrs;
-		tp->t_ticket->t_curr_res -= hdrs;
-		ASSERT(tp->t_ticket->t_curr_res >= len);
-	}
-	tp->t_ticket->t_curr_res -= len;
-	ctx->space_used += len;
-
 	spin_unlock(&cil->xc_cil_lock);
+
+	if (tp->t_ticket->t_curr_res < 0)
+		xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
 }
 
 static void
@@ -973,6 +993,7 @@ xfs_log_commit_cil(
 {
 	struct xlog		*log = mp->m_log;
 	struct xfs_cil		*cil = log->l_cilp;
+	xfs_lsn_t		xc_commit_lsn;
 
 	/*
 	 * Do all necessary memory allocation before we lock the CIL.
@@ -986,13 +1007,9 @@ xfs_log_commit_cil(
 
 	xlog_cil_insert_items(log, tp);
 
-	/* check we didn't blow the reservation */
-	if (tp->t_ticket->t_curr_res < 0)
-		xlog_print_tic_res(mp, tp->t_ticket);
-
-	tp->t_commit_lsn = cil->xc_ctx->sequence;
+	xc_commit_lsn = cil->xc_ctx->sequence;
 	if (commit_lsn)
-		*commit_lsn = tp->t_commit_lsn;
+		*commit_lsn = xc_commit_lsn;
 
 	xfs_log_done(mp, tp->t_ticket, NULL, regrant);
 	xfs_trans_unreserve_and_mod_sb(tp);
@@ -1008,7 +1025,7 @@ xfs_log_commit_cil(
 	 * the log items. This affects (at least) processing of stale buffers,
 	 * inodes and EFIs.
 	 */
-	xfs_trans_free_items(tp, tp->t_commit_lsn, false);
+	xfs_trans_free_items(tp, xc_commit_lsn, false);
 
 	xlog_cil_push_background(log);
 

diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c2604a5..51bf7b8 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h

@@ -419,7 +419,7 @@ struct xlog {
 };
 
 #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
-	((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
+	((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE))
 
 #define XLOG_FORCED_SHUTDOWN(log)	((log)->l_flags & XLOG_IO_ERROR)
 
@@ -456,6 +456,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 }
 
 void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+void	xlog_print_trans(struct xfs_trans *);
 int
 xlog_write(
 	struct xlog		*log,

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8cec1e5..9549188 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c

@@ -2230,9 +2230,9 @@ xlog_recover_get_buf_lsn(
 	struct xfs_mount	*mp,
 	struct xfs_buf		*bp)
 {
-	__uint32_t		magic32;
-	__uint16_t		magic16;
-	__uint16_t		magicda;
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
 	void			*blk = bp->b_addr;
 	uuid_t			*uuid;
 	xfs_lsn_t		lsn = -1;
@@ -2381,9 +2381,9 @@ xlog_recover_validate_buf_type(
 	xfs_lsn_t		current_lsn)
 {
 	struct xfs_da_blkinfo	*info = bp->b_addr;
-	__uint32_t		magic32;
-	__uint16_t		magic16;
-	__uint16_t		magicda;
+	uint32_t		magic32;
+	uint16_t		magic16;
+	uint16_t		magicda;
 	char			*warnmsg = NULL;
 
 	/*
@@ -2852,7 +2852,7 @@ xlog_recover_buffer_pass2(
 	if (XFS_DINODE_MAGIC ==
 	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
 	    (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize,
-			(__uint32_t)log->l_mp->m_inode_cluster_size))) {
+			(uint32_t)log->l_mp->m_inode_cluster_size))) {
 		xfs_buf_stale(bp);
 		error = xfs_bwrite(bp);
 	} else {
@@ -3423,7 +3423,7 @@ xlog_recover_efd_pass2(
 	xfs_efd_log_format_t	*efd_formatp;
 	xfs_efi_log_item_t	*efip = NULL;
 	xfs_log_item_t		*lip;
-	__uint64_t		efi_id;
+	uint64_t		efi_id;
 	struct xfs_ail_cursor	cur;
 	struct xfs_ail		*ailp = log->l_ailp;
 
@@ -3519,7 +3519,7 @@ xlog_recover_rud_pass2(
 	struct xfs_rud_log_format	*rud_formatp;
 	struct xfs_rui_log_item		*ruip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			rui_id;
+	uint64_t			rui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -3635,7 +3635,7 @@ xlog_recover_cud_pass2(
 	struct xfs_cud_log_format	*cud_formatp;
 	struct xfs_cui_log_item		*cuip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			cui_id;
+	uint64_t			cui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -3754,7 +3754,7 @@ xlog_recover_bud_pass2(
 	struct xfs_bud_log_format	*bud_formatp;
 	struct xfs_bui_log_item		*buip = NULL;
 	struct xfs_log_item		*lip;
-	__uint64_t			bui_id;
+	uint64_t			bui_id;
 	struct xfs_ail_cursor		cur;
 	struct xfs_ail			*ailp = log->l_ailp;
 
@@ -4152,7 +4152,7 @@ xlog_recover_commit_trans(
 
 	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
 
-	hlist_del(&trans->r_list);
+	hlist_del_init(&trans->r_list);
 
 	error = xlog_recover_reorder_trans(log, trans, pass);
 	if (error)
@@ -4354,6 +4354,8 @@ xlog_recover_free_trans(
 	xlog_recover_item_t	*item, *n;
 	int			i;
 
+	hlist_del_init(&trans->r_list);
+
 	list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) {
 		/* Free the regions in the item. */
 		list_del(&item->ri_list);
@@ -5224,12 +5226,16 @@ xlog_do_recovery_pass(
 	int			error2 = 0;
 	int			bblks, split_bblks;
 	int			hblks, split_hblks, wrapped_hblks;
+	int			i;
 	struct hlist_head	rhash[XLOG_RHASH_SIZE];
 	LIST_HEAD		(buffer_list);
 
 	ASSERT(head_blk != tail_blk);
 	rhead_blk = 0;
 
+	for (i = 0; i < XLOG_RHASH_SIZE; i++)
+		INIT_HLIST_HEAD(&rhash[i]);
+
 	/*
 	 * Read the header of the tail block and get the iclog buffer size from
 	 * h_size.  Use this to tell how many sectors make up the log header.
@@ -5466,6 +5472,19 @@ xlog_do_recovery_pass(
 	if (error && first_bad)
 		*first_bad = rhead_blk;
 
+	/*
+	 * Transactions are freed at commit time but transactions without commit
+	 * records on disk are never committed. Free any that may be left in the
+	 * hash table.
+	 */
+	for (i = 0; i < XLOG_RHASH_SIZE; i++) {
+		struct hlist_node	*tmp;
+		struct xlog_recover	*trans;
+
+		hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list)
+			xlog_recover_free_trans(trans);
+	}
+
 	return error ? error : error2;
 }
 
@@ -5772,9 +5791,9 @@ xlog_recover_check_summary(
 	xfs_buf_t	*agfbp;
 	xfs_buf_t	*agibp;
 	xfs_agnumber_t	agno;
-	__uint64_t	freeblks;
-	__uint64_t	itotal;
-	__uint64_t	ifree;
+	uint64_t	freeblks;
+	uint64_t	itotal;
+	uint64_t	ifree;
 	int		error;
 
 	mp = log->l_mp;

diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 11792d8..e68bd10 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c

@@ -110,7 +110,10 @@ assfail(char *expr, char *file, int line)
 {
 	xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
 		expr, file, line);
-	BUG();
+	if (xfs_globals.bug_on_assert)
+		BUG();
+	else
+		WARN_ON(1);
 }
 
 void

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 43d07f9..40d4e8b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c

@@ -173,7 +173,7 @@ xfs_free_perag(
 int
 xfs_sb_validate_fsb_count(
 	xfs_sb_t	*sbp,
-	__uint64_t	nblocks)
+	uint64_t	nblocks)
 {
 	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
@@ -435,7 +435,7 @@ STATIC void
 xfs_set_maxicount(xfs_mount_t *mp)
 {
 	xfs_sb_t	*sbp = &(mp->m_sb);
-	__uint64_t	icount;
+	uint64_t	icount;
 
 	if (sbp->sb_imax_pct) {
 		/*
@@ -501,7 +501,7 @@ xfs_set_low_space_thresholds(
 	int i;
 
 	for (i = 0; i < XFS_LOWSP_MAX; i++) {
-		__uint64_t space = mp->m_sb.sb_dblocks;
+		uint64_t space = mp->m_sb.sb_dblocks;
 
 		do_div(space, 100);
 		mp->m_low_space[i] = space * (i + 1);
@@ -597,10 +597,10 @@ xfs_mount_reset_sbqflags(
 	return xfs_sync_sb(mp, false);
 }
 
-__uint64_t
+uint64_t
 xfs_default_resblks(xfs_mount_t *mp)
 {
-	__uint64_t resblks;
+	uint64_t resblks;
 
 	/*
 	 * We default to 5% or 8192 fsbs of space reserved, whichever is
@@ -611,7 +611,7 @@ xfs_default_resblks(xfs_mount_t *mp)
 	 */
 	resblks = mp->m_sb.sb_dblocks;
 	do_div(resblks, 20);
-	resblks = min_t(__uint64_t, resblks, 8192);
+	resblks = min_t(uint64_t, resblks, 8192);
 	return resblks;
 }
 
@@ -631,7 +631,7 @@ xfs_mountfs(
 {
 	struct xfs_sb		*sbp = &(mp->m_sb);
 	struct xfs_inode	*rip;
-	__uint64_t		resblks;
+	uint64_t		resblks;
 	uint			quotamount = 0;
 	uint			quotaflags = 0;
 	int			error = 0;
@@ -719,10 +719,13 @@ xfs_mountfs(
 	if (error)
 		goto out_del_stats;
 
+	error = xfs_errortag_init(mp);
+	if (error)
+		goto out_remove_error_sysfs;
 
 	error = xfs_uuid_mount(mp);
 	if (error)
-		goto out_remove_error_sysfs;
+		goto out_remove_errortag;
 
 	/*
 	 * Set the minimum read and write sizes
@@ -1044,6 +1047,8 @@ xfs_mountfs(
 	xfs_da_unmount(mp);
  out_remove_uuid:
 	xfs_uuid_unmount(mp);
+ out_remove_errortag:
+	xfs_errortag_del(mp);
  out_remove_error_sysfs:
 	xfs_error_sysfs_del(mp);
  out_del_stats:
@@ -1062,7 +1067,7 @@ void
 xfs_unmountfs(
 	struct xfs_mount	*mp)
 {
-	__uint64_t		resblks;
+	uint64_t		resblks;
 	int			error;
 
 	cancel_delayed_work_sync(&mp->m_eofblocks_work);
@@ -1147,10 +1152,11 @@ xfs_unmountfs(
 	xfs_uuid_unmount(mp);
 
 #if defined(DEBUG)
-	xfs_errortag_clearall(mp, 0);
+	xfs_errortag_clearall(mp);
 #endif
 	xfs_free_perag(mp);
 
+	xfs_errortag_del(mp);
 	xfs_error_sysfs_del(mp);
 	xfs_sysfs_del(&mp->m_stats.xs_kobj);
 	xfs_sysfs_del(&mp->m_kobj);

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9fa312a..e0792d0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h

@@ -108,10 +108,10 @@ typedef struct xfs_mount {
 	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
 	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
 	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
-	__uint8_t		m_blkbit_log;	/* blocklog + NBBY */
-	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */
-	__uint8_t		m_agno_log;	/* log #ag's */
-	__uint8_t		m_agino_log;	/* #bits for agino in inum */
+	uint8_t			m_blkbit_log;	/* blocklog + NBBY */
+	uint8_t			m_blkbb_log;	/* blocklog - BBSHIFT */
+	uint8_t			m_agno_log;	/* log #ag's */
+	uint8_t			m_agino_log;	/* #bits for agino in inum */
 	uint			m_inode_cluster_size;/* min inode buf size */
 	uint			m_blockmask;	/* sb_blocksize-1 */
 	uint			m_blockwsize;	/* sb_blocksize in words */
@@ -139,7 +139,7 @@ typedef struct xfs_mount {
 	struct mutex		m_growlock;	/* growfs mutex */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
 	uint			m_dmevmask;	/* DMI events for this FS */
-	__uint64_t		m_flags;	/* global mount flags */
+	uint64_t		m_flags;	/* global mount flags */
 	bool			m_inotbt_nores; /* no per-AG finobt resv. */
 	int			m_ialloc_inos;	/* inodes in inode allocation */
 	int			m_ialloc_blks;	/* blocks in inode allocation */
@@ -148,14 +148,14 @@ typedef struct xfs_mount {
 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
 	uint			m_qflags;	/* quota status flags */
 	struct xfs_trans_resv	m_resv;		/* precomputed res values */
-	__uint64_t		m_maxicount;	/* maximum inode count */
-	__uint64_t		m_resblks;	/* total reserved blocks */
-	__uint64_t		m_resblks_avail;/* available reserved blocks */
-	__uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
+	uint64_t		m_maxicount;	/* maximum inode count */
+	uint64_t		m_resblks;	/* total reserved blocks */
+	uint64_t		m_resblks_avail;/* available reserved blocks */
+	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
 	int			m_dalign;	/* stripe unit */
 	int			m_swidth;	/* stripe width */
 	int			m_sinoalign;	/* stripe unit inode alignment */
-	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */
+	uint8_t			m_sectbb_log;	/* sectlog - BBSHIFT */
 	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */
 	const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
 	const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
@@ -194,19 +194,17 @@ typedef struct xfs_mount {
 	 * ever support shrinks it would have to be persisted in addition
 	 * to various other kinds of pain inflicted on the pNFS server.
 	 */
-	__uint32_t		m_generation;
+	uint32_t		m_generation;
 
 	bool			m_fail_unmount;
 #ifdef DEBUG
 	/*
-	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
-	 * block killing in the event of failed writes. When enabled, all
-	 * buffered writes are silenty dropped and handled as if they failed.
-	 * All delalloc blocks in the range of the write (including pre-existing
-	 * delalloc blocks!) are tossed as part of the write failure error
-	 * handling sequence.
+	 * Frequency with which errors are injected.  Replaces xfs_etest; the
+	 * value stored in here is the inverse of the frequency with which the
+	 * error triggers.  1 = always, 2 = half the time, etc.
 	 */
-	bool			m_drop_writes;
+	unsigned int		*m_errortag;
+	struct xfs_kobj		m_errortag_kobj;
 #endif
 } xfs_mount_t;
 
@@ -325,20 +323,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-#ifdef DEBUG
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
-	return mp->m_drop_writes;
-}
-#else
-static inline bool
-xfs_mp_drop_writes(struct xfs_mount *mp)
-{
-	return 0;
-}
-#endif
-
 /* per-AG block reservation data structures*/
 enum xfs_ag_resv_type {
 	XFS_AG_RESV_NONE = 0,
@@ -367,12 +351,12 @@ typedef struct xfs_perag {
 	char		pagi_init;	/* this agi's entry is initialized */
 	char		pagf_metadata;	/* the agf is preferred to be metadata */
 	char		pagi_inodeok;	/* The agi is ok for inodes */
-	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
+	uint8_t		pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
-	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
+	uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
-	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
+	uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
 	xfs_agino_t	pagi_freecount;	/* number of free inodes */
 	xfs_agino_t	pagi_count;	/* number of allocated inodes */
 
@@ -411,7 +395,7 @@ typedef struct xfs_perag {
 	struct xfs_ag_resv	pag_agfl_resv;
 
 	/* reference count */
-	__uint8_t		pagf_refcount_level;
+	uint8_t			pagf_refcount_level;
 } xfs_perag_t;
 
 static inline struct xfs_ag_resv *
@@ -434,7 +418,7 @@ void xfs_buf_hash_destroy(xfs_perag_t *pag);
 
 extern void	xfs_uuid_table_free(void);
 extern int	xfs_log_sbcount(xfs_mount_t *);
-extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
+extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
 extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
 				     xfs_agnumber_t *maxagi);
@@ -450,7 +434,7 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int	xfs_readsb(xfs_mount_t *, int);
 extern void	xfs_freesb(xfs_mount_t *);
 extern bool	xfs_fs_writable(struct xfs_mount *mp, int level);
-extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
+extern int	xfs_sb_validate_fsb_count(struct xfs_sb *, uint64_t);
 
 extern int	xfs_dev_is_read_only(struct xfs_mount *, char *);
 

diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5fe6e70..6ce948c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c

@@ -1247,6 +1247,7 @@ xfs_qm_flush_one(
 	struct xfs_dquot	*dqp,
 	void			*data)
 {
+	struct xfs_mount	*mp = dqp->q_mount;
 	struct list_head	*buffer_list = data;
 	struct xfs_buf		*bp = NULL;
 	int			error = 0;
@@ -1257,7 +1258,32 @@ xfs_qm_flush_one(
 	if (!XFS_DQ_IS_DIRTY(dqp))
 		goto out_unlock;
 
-	xfs_dqflock(dqp);
+	/*
+	 * The only way the dquot is already flush locked by the time quotacheck
+	 * gets here is if reclaim flushed it before the dqadjust walk dirtied
+	 * it for the final time. Quotacheck collects all dquot bufs in the
+	 * local delwri queue before dquots are dirtied, so reclaim can't have
+	 * possibly queued it for I/O. The only way out is to push the buffer to
+	 * cycle the flush lock.
+	 */
+	if (!xfs_dqflock_nowait(dqp)) {
+		/* buf is pinned in-core by delwri list */
+		DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno,
+				      mp->m_quotainfo->qi_dqchunklen);
+		bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL);
+		if (!bp) {
+			error = -EINVAL;
+			goto out_unlock;
+		}
+		xfs_buf_unlock(bp);
+
+		xfs_buf_delwri_pushbuf(bp, buffer_list);
+		xfs_buf_rele(bp);
+
+		error = -EAGAIN;
+		goto out_unlock;
+	}
+
 	error = xfs_qm_dqflush(dqp, &bp);
 	if (error)
 		goto out_unlock;

diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3e52d5d..2be6d27 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c

@@ -33,7 +33,7 @@ xfs_fill_statvfs_from_dquot(
 	struct kstatfs		*statp,
 	struct xfs_dquot	*dqp)
 {
-	__uint64_t		limit;
+	uint64_t		limit;
 
 	limit = dqp->q_core.d_blk_softlimit ?
 		be64_to_cpu(dqp->q_core.d_blk_softlimit) :

diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index f82d79a..de94932 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c

@@ -269,7 +269,6 @@ xfs_fs_get_nextdqblk(
 	/* ID may be different, so convert back what we got */
 	*qid = make_kqid(current_user_ns(), qid->type, id);
 	return 0;
-	
 }
 
 STATIC int

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ffe6fe7..ab2270a 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c

@@ -155,6 +155,7 @@
 int
 xfs_reflink_find_shared(
 	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
 	xfs_agnumber_t		agno,
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		aglen,
@@ -166,18 +167,18 @@ xfs_reflink_find_shared(
 	struct xfs_btree_cur	*cur;
 	int			error;
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
 	if (error)
 		return error;
 
-	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
+	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL);
 
 	error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
 			find_end_of_shared);
 
 	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 
-	xfs_buf_relse(agbp);
+	xfs_trans_brelse(tp, agbp);
 	return error;
 }
 
@@ -217,7 +218,7 @@ xfs_reflink_trim_around_shared(
 	agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
 	aglen = irec->br_blockcount;
 
-	error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
+	error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
 			aglen, &fbno, &flen, true);
 	if (error)
 		return error;
@@ -1373,8 +1374,8 @@ xfs_reflink_dirty_extents(
 			agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
 			aglen = map[1].br_blockcount;
 
-			error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
-					&rbno, &rlen, true);
+			error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
+					aglen, &rbno, &rlen, true);
 			if (error)
 				goto out;
 			if (rbno == NULLAGBLOCK)
@@ -1405,56 +1406,72 @@ xfs_reflink_dirty_extents(
 	return error;
 }
 
+/* Does this inode need the reflink flag? */
+int
+xfs_reflink_inode_has_shared_extents(
+	struct xfs_trans		*tp,
+	struct xfs_inode		*ip,
+	bool				*has_shared)
+{
+	struct xfs_bmbt_irec		got;
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_ifork		*ifp;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	xfs_extlen_t			aglen;
+	xfs_agblock_t			rbno;
+	xfs_extlen_t			rlen;
+	xfs_extnum_t			idx;
+	bool				found;
+	int				error;
+
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+		error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+		if (error)
+			return error;
+	}
+
+	*has_shared = false;
+	found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got);
+	while (found) {
+		if (isnullstartblock(got.br_startblock) ||
+		    got.br_state != XFS_EXT_NORM)
+			goto next;
+		agno = XFS_FSB_TO_AGNO(mp, got.br_startblock);
+		agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock);
+		aglen = got.br_blockcount;
+
+		error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen,
+				&rbno, &rlen, false);
+		if (error)
+			return error;
+		/* Is there still a shared block here? */
+		if (rbno != NULLAGBLOCK) {
+			*has_shared = true;
+			return 0;
+		}
+next:
+		found = xfs_iext_get_extent(ifp, ++idx, &got);
+	}
+
+	return 0;
+}
+
 /* Clear the inode reflink flag if there are no shared extents. */
 int
 xfs_reflink_clear_inode_flag(
 	struct xfs_inode	*ip,
 	struct xfs_trans	**tpp)
 {
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		fbno;
-	xfs_filblks_t		end;
-	xfs_agnumber_t		agno;
-	xfs_agblock_t		agbno;
-	xfs_extlen_t		aglen;
-	xfs_agblock_t		rbno;
-	xfs_extlen_t		rlen;
-	struct xfs_bmbt_irec	map;
-	int			nmaps;
+	bool			needs_flag;
 	int			error = 0;
 
 	ASSERT(xfs_is_reflink_inode(ip));
 
-	fbno = 0;
-	end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
-	while (end - fbno > 0) {
-		nmaps = 1;
-		/*
-		 * Look for extents in the file.  Skip holes, delalloc, or
-		 * unwritten extents; they can't be reflinked.
-		 */
-		error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0);
-		if (error)
-			return error;
-		if (nmaps == 0)
-			break;
-		if (!xfs_bmap_is_real_extent(&map))
-			goto next;
-
-		agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
-		agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
-		aglen = map.br_blockcount;
-
-		error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
-				&rbno, &rlen, false);
-		if (error)
-			return error;
-		/* Is there still a shared block here? */
-		if (rbno != NULLAGBLOCK)
-			return 0;
-next:
-		fbno = map.br_startoff + map.br_blockcount;
-	}
+	error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+	if (error || needs_flag)
+		return error;
 
 	/*
 	 * We didn't find any shared blocks so turn off the reflink flag.

diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index d29a796..701487b 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h

@@ -20,9 +20,9 @@
 #ifndef __XFS_REFLINK_H
 #define __XFS_REFLINK_H 1
 
-extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
-		xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
-		xfs_extlen_t *flen, bool find_maximal);
+extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
+		xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
+		xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
 		struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
 
@@ -47,6 +47,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
+		struct xfs_inode *ip, bool *has_shared);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
 		struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index c57aa7f..9147219 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c

@@ -1256,13 +1256,13 @@ xfs_rtpick_extent(
 {
 	xfs_rtblock_t	b;		/* result block */
 	int		log2;		/* log of sequence number */
-	__uint64_t	resid;		/* residual after log removed */
-	__uint64_t	seq;		/* sequence number of file creation */
-	__uint64_t	*seqp;		/* pointer to seqno in inode */
+	uint64_t	resid;		/* residual after log removed */
+	uint64_t	seq;		/* sequence number of file creation */
+	uint64_t	*seqp;		/* pointer to seqno in inode */
 
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
-	seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
+	seqp = (uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
 	if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
 		mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
 		*seqp = 0;

diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f13133e..79defa7 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h

@@ -107,6 +107,8 @@ xfs_growfs_rt(
 /*
  * From xfs_rtbitmap.c
  */
+int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+		  xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
 int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		      xfs_rtblock_t start, xfs_extlen_t len, int val,
 		      xfs_rtblock_t *new, int *stat);
@@ -143,6 +145,7 @@ int xfs_rtalloc_query_all(struct xfs_trans *tp,
 # define xfs_growfs_rt(mp,in)                           (ENOSYS)
 # define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
 # define xfs_rtalloc_query_all(t,f,p)                   (ENOSYS)
+# define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
 static inline int		/* error */
 xfs_rtmount_init(
 	xfs_mount_t	*mp)	/* file system mount structure */

diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f11282c..056e12b 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c

@@ -33,9 +33,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 {
 	int		i, j;
 	int		len = 0;
-	__uint64_t	xs_xstrat_bytes = 0;
-	__uint64_t	xs_write_bytes = 0;
-	__uint64_t	xs_read_bytes = 0;
+	uint64_t	xs_xstrat_bytes = 0;
+	uint64_t	xs_write_bytes = 0;
+	uint64_t	xs_read_bytes = 0;
 
 	static const struct xstats_entry {
 		char	*desc;
@@ -100,7 +100,7 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 void xfs_stats_clearall(struct xfsstats __percpu *stats)
 {
 	int		c;
-	__uint32_t	vn_active;
+	uint32_t	vn_active;
 
 	xfs_notice(NULL, "Clearing xfsstats");
 	for_each_possible_cpu(c) {

diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 375840f..f64d0ae 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h

@@ -54,125 +54,125 @@ enum {
  */
 struct __xfsstats {
 # define XFSSTAT_END_EXTENT_ALLOC	4
-	__uint32_t		xs_allocx;
-	__uint32_t		xs_allocb;
-	__uint32_t		xs_freex;
-	__uint32_t		xs_freeb;
+	uint32_t		xs_allocx;
+	uint32_t		xs_allocb;
+	uint32_t		xs_freex;
+	uint32_t		xs_freeb;
 # define XFSSTAT_END_ALLOC_BTREE	(XFSSTAT_END_EXTENT_ALLOC+4)
-	__uint32_t		xs_abt_lookup;
-	__uint32_t		xs_abt_compare;
-	__uint32_t		xs_abt_insrec;
-	__uint32_t		xs_abt_delrec;
+	uint32_t		xs_abt_lookup;
+	uint32_t		xs_abt_compare;
+	uint32_t		xs_abt_insrec;
+	uint32_t		xs_abt_delrec;
 # define XFSSTAT_END_BLOCK_MAPPING	(XFSSTAT_END_ALLOC_BTREE+7)
-	__uint32_t		xs_blk_mapr;
-	__uint32_t		xs_blk_mapw;
-	__uint32_t		xs_blk_unmap;
-	__uint32_t		xs_add_exlist;
-	__uint32_t		xs_del_exlist;
-	__uint32_t		xs_look_exlist;
-	__uint32_t		xs_cmp_exlist;
+	uint32_t		xs_blk_mapr;
+	uint32_t		xs_blk_mapw;
+	uint32_t		xs_blk_unmap;
+	uint32_t		xs_add_exlist;
+	uint32_t		xs_del_exlist;
+	uint32_t		xs_look_exlist;
+	uint32_t		xs_cmp_exlist;
 # define XFSSTAT_END_BLOCK_MAP_BTREE	(XFSSTAT_END_BLOCK_MAPPING+4)
-	__uint32_t		xs_bmbt_lookup;
-	__uint32_t		xs_bmbt_compare;
-	__uint32_t		xs_bmbt_insrec;
-	__uint32_t		xs_bmbt_delrec;
+	uint32_t		xs_bmbt_lookup;
+	uint32_t		xs_bmbt_compare;
+	uint32_t		xs_bmbt_insrec;
+	uint32_t		xs_bmbt_delrec;
 # define XFSSTAT_END_DIRECTORY_OPS	(XFSSTAT_END_BLOCK_MAP_BTREE+4)
-	__uint32_t		xs_dir_lookup;
-	__uint32_t		xs_dir_create;
-	__uint32_t		xs_dir_remove;
-	__uint32_t		xs_dir_getdents;
+	uint32_t		xs_dir_lookup;
+	uint32_t		xs_dir_create;
+	uint32_t		xs_dir_remove;
+	uint32_t		xs_dir_getdents;
 # define XFSSTAT_END_TRANSACTIONS	(XFSSTAT_END_DIRECTORY_OPS+3)
-	__uint32_t		xs_trans_sync;
-	__uint32_t		xs_trans_async;
-	__uint32_t		xs_trans_empty;
+	uint32_t		xs_trans_sync;
+	uint32_t		xs_trans_async;
+	uint32_t		xs_trans_empty;
 # define XFSSTAT_END_INODE_OPS		(XFSSTAT_END_TRANSACTIONS+7)
-	__uint32_t		xs_ig_attempts;
-	__uint32_t		xs_ig_found;
-	__uint32_t		xs_ig_frecycle;
-	__uint32_t		xs_ig_missed;
-	__uint32_t		xs_ig_dup;
-	__uint32_t		xs_ig_reclaims;
-	__uint32_t		xs_ig_attrchg;
+	uint32_t		xs_ig_attempts;
+	uint32_t		xs_ig_found;
+	uint32_t		xs_ig_frecycle;
+	uint32_t		xs_ig_missed;
+	uint32_t		xs_ig_dup;
+	uint32_t		xs_ig_reclaims;
+	uint32_t		xs_ig_attrchg;
 # define XFSSTAT_END_LOG_OPS		(XFSSTAT_END_INODE_OPS+5)
-	__uint32_t		xs_log_writes;
-	__uint32_t		xs_log_blocks;
-	__uint32_t		xs_log_noiclogs;
-	__uint32_t		xs_log_force;
-	__uint32_t		xs_log_force_sleep;
+	uint32_t		xs_log_writes;
+	uint32_t		xs_log_blocks;
+	uint32_t		xs_log_noiclogs;
+	uint32_t		xs_log_force;
+	uint32_t		xs_log_force_sleep;
 # define XFSSTAT_END_TAIL_PUSHING	(XFSSTAT_END_LOG_OPS+10)
-	__uint32_t		xs_try_logspace;
-	__uint32_t		xs_sleep_logspace;
-	__uint32_t		xs_push_ail;
-	__uint32_t		xs_push_ail_success;
-	__uint32_t		xs_push_ail_pushbuf;
-	__uint32_t		xs_push_ail_pinned;
-	__uint32_t		xs_push_ail_locked;
-	__uint32_t		xs_push_ail_flushing;
-	__uint32_t		xs_push_ail_restarts;
-	__uint32_t		xs_push_ail_flush;
+	uint32_t		xs_try_logspace;
+	uint32_t		xs_sleep_logspace;
+	uint32_t		xs_push_ail;
+	uint32_t		xs_push_ail_success;
+	uint32_t		xs_push_ail_pushbuf;
+	uint32_t		xs_push_ail_pinned;
+	uint32_t		xs_push_ail_locked;
+	uint32_t		xs_push_ail_flushing;
+	uint32_t		xs_push_ail_restarts;
+	uint32_t		xs_push_ail_flush;
 # define XFSSTAT_END_WRITE_CONVERT	(XFSSTAT_END_TAIL_PUSHING+2)
-	__uint32_t		xs_xstrat_quick;
-	__uint32_t		xs_xstrat_split;
+	uint32_t		xs_xstrat_quick;
+	uint32_t		xs_xstrat_split;
 # define XFSSTAT_END_READ_WRITE_OPS	(XFSSTAT_END_WRITE_CONVERT+2)
-	__uint32_t		xs_write_calls;
-	__uint32_t		xs_read_calls;
+	uint32_t		xs_write_calls;
+	uint32_t		xs_read_calls;
 # define XFSSTAT_END_ATTRIBUTE_OPS	(XFSSTAT_END_READ_WRITE_OPS+4)
-	__uint32_t		xs_attr_get;
-	__uint32_t		xs_attr_set;
-	__uint32_t		xs_attr_remove;
-	__uint32_t		xs_attr_list;
+	uint32_t		xs_attr_get;
+	uint32_t		xs_attr_set;
+	uint32_t		xs_attr_remove;
+	uint32_t		xs_attr_list;
 # define XFSSTAT_END_INODE_CLUSTER	(XFSSTAT_END_ATTRIBUTE_OPS+3)
-	__uint32_t		xs_iflush_count;
-	__uint32_t		xs_icluster_flushcnt;
-	__uint32_t		xs_icluster_flushinode;
+	uint32_t		xs_iflush_count;
+	uint32_t		xs_icluster_flushcnt;
+	uint32_t		xs_icluster_flushinode;
 # define XFSSTAT_END_VNODE_OPS		(XFSSTAT_END_INODE_CLUSTER+8)
-	__uint32_t		vn_active;	/* # vnodes not on free lists */
-	__uint32_t		vn_alloc;	/* # times vn_alloc called */
-	__uint32_t		vn_get;		/* # times vn_get called */
-	__uint32_t		vn_hold;	/* # times vn_hold called */
-	__uint32_t		vn_rele;	/* # times vn_rele called */
-	__uint32_t		vn_reclaim;	/* # times vn_reclaim called */
-	__uint32_t		vn_remove;	/* # times vn_remove called */
-	__uint32_t		vn_free;	/* # times vn_free called */
+	uint32_t		vn_active;	/* # vnodes not on free lists */
+	uint32_t		vn_alloc;	/* # times vn_alloc called */
+	uint32_t		vn_get;		/* # times vn_get called */
+	uint32_t		vn_hold;	/* # times vn_hold called */
+	uint32_t		vn_rele;	/* # times vn_rele called */
+	uint32_t		vn_reclaim;	/* # times vn_reclaim called */
+	uint32_t		vn_remove;	/* # times vn_remove called */
+	uint32_t		vn_free;	/* # times vn_free called */
 #define XFSSTAT_END_BUF			(XFSSTAT_END_VNODE_OPS+9)
-	__uint32_t		xb_get;
-	__uint32_t		xb_create;
-	__uint32_t		xb_get_locked;
-	__uint32_t		xb_get_locked_waited;
-	__uint32_t		xb_busy_locked;
-	__uint32_t		xb_miss_locked;
-	__uint32_t		xb_page_retries;
-	__uint32_t		xb_page_found;
-	__uint32_t		xb_get_read;
+	uint32_t		xb_get;
+	uint32_t		xb_create;
+	uint32_t		xb_get_locked;
+	uint32_t		xb_get_locked_waited;
+	uint32_t		xb_busy_locked;
+	uint32_t		xb_miss_locked;
+	uint32_t		xb_page_retries;
+	uint32_t		xb_page_found;
+	uint32_t		xb_get_read;
 /* Version 2 btree counters */
 #define XFSSTAT_END_ABTB_V2		(XFSSTAT_END_BUF + __XBTS_MAX)
-	__uint32_t		xs_abtb_2[__XBTS_MAX];
+	uint32_t		xs_abtb_2[__XBTS_MAX];
 #define XFSSTAT_END_ABTC_V2		(XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
-	__uint32_t		xs_abtc_2[__XBTS_MAX];
+	uint32_t		xs_abtc_2[__XBTS_MAX];
 #define XFSSTAT_END_BMBT_V2		(XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
-	__uint32_t		xs_bmbt_2[__XBTS_MAX];
+	uint32_t		xs_bmbt_2[__XBTS_MAX];
 #define XFSSTAT_END_IBT_V2		(XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_ibt_2[__XBTS_MAX];
+	uint32_t		xs_ibt_2[__XBTS_MAX];
 #define XFSSTAT_END_FIBT_V2		(XFSSTAT_END_IBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_fibt_2[__XBTS_MAX];
+	uint32_t		xs_fibt_2[__XBTS_MAX];
 #define XFSSTAT_END_RMAP_V2		(XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
-	__uint32_t		xs_rmap_2[__XBTS_MAX];
+	uint32_t		xs_rmap_2[__XBTS_MAX];
 #define XFSSTAT_END_REFCOUNT		(XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
-	__uint32_t		xs_refcbt_2[__XBTS_MAX];
+	uint32_t		xs_refcbt_2[__XBTS_MAX];
 #define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_REFCOUNT + 6)
-	__uint32_t		xs_qm_dqreclaims;
-	__uint32_t		xs_qm_dqreclaim_misses;
-	__uint32_t		xs_qm_dquot_dups;
-	__uint32_t		xs_qm_dqcachemisses;
-	__uint32_t		xs_qm_dqcachehits;
-	__uint32_t		xs_qm_dqwants;
+	uint32_t		xs_qm_dqreclaims;
+	uint32_t		xs_qm_dqreclaim_misses;
+	uint32_t		xs_qm_dquot_dups;
+	uint32_t		xs_qm_dqcachemisses;
+	uint32_t		xs_qm_dqcachehits;
+	uint32_t		xs_qm_dqwants;
 #define XFSSTAT_END_QM			(XFSSTAT_END_XQMSTAT+2)
-	__uint32_t		xs_qm_dquot;
-	__uint32_t		xs_qm_dquot_unused;
+	uint32_t		xs_qm_dquot;
+	uint32_t		xs_qm_dquot_unused;
 /* Extra precision counters */
-	__uint64_t		xs_xstrat_bytes;
-	__uint64_t		xs_write_bytes;
-	__uint64_t		xs_read_bytes;
+	uint64_t		xs_xstrat_bytes;
+	uint64_t		xs_write_bytes;
+	uint64_t		xs_read_bytes;
 };
 
 struct xfsstats {
@@ -186,7 +186,7 @@ struct xfsstats {
  * simple wrapper for getting the array index of s struct member offset
  */
 #define XFS_STATS_CALC_INDEX(member)	\
-	(offsetof(struct __xfsstats, member) / (int)sizeof(__uint32_t))
+	(offsetof(struct __xfsstats, member) / (int)sizeof(uint32_t))
 
 
 int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 97df4db..38aaacd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c

@@ -196,7 +196,7 @@ xfs_parseargs(
 	int			dsunit = 0;
 	int			dswidth = 0;
 	int			iosize = 0;
-	__uint8_t		iosizelog = 0;
+	uint8_t			iosizelog = 0;
 
 	/*
 	 * set up the mount name first so all the errors will refer to the
@@ -556,7 +556,7 @@ xfs_showargs(
 
 	return 0;
 }
-static __uint64_t
+static uint64_t
 xfs_max_file_offset(
 	unsigned int		blockshift)
 {
@@ -587,7 +587,7 @@ xfs_max_file_offset(
 # endif
 #endif
 
-	return (((__uint64_t)pagefactor) << bitshift) - 1;
+	return (((uint64_t)pagefactor) << bitshift) - 1;
 }
 
 /*
@@ -622,7 +622,7 @@ xfs_set_inode_alloc(
 	 * the max inode percentage.  Used only for inode32.
 	 */
 	if (mp->m_maxicount) {
-		__uint64_t	icount;
+		uint64_t	icount;
 
 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
 		do_div(icount, 100);
@@ -1088,12 +1088,12 @@ xfs_fs_statfs(
 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
 	xfs_sb_t		*sbp = &mp->m_sb;
 	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
-	__uint64_t		fakeinos, id;
-	__uint64_t		icount;
-	__uint64_t		ifree;
-	__uint64_t		fdblocks;
+	uint64_t		fakeinos, id;
+	uint64_t		icount;
+	uint64_t		ifree;
+	uint64_t		fdblocks;
 	xfs_extlen_t		lsize;
-	__int64_t		ffree;
+	int64_t			ffree;
 
 	statp->f_type = XFS_SB_MAGIC;
 	statp->f_namelen = MAXNAMELEN - 1;
@@ -1116,7 +1116,7 @@ xfs_fs_statfs(
 	statp->f_bavail = statp->f_bfree;
 
 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
-	statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+	statp->f_files = MIN(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
 	if (mp->m_maxicount)
 		statp->f_files = min_t(typeof(statp->f_files),
 					statp->f_files,
@@ -1129,7 +1129,7 @@ xfs_fs_statfs(
 
 	/* make sure statp->f_ffree does not underflow */
 	ffree = statp->f_files - (icount - ifree);
-	statp->f_ffree = max_t(__int64_t, ffree, 0);
+	statp->f_ffree = max_t(int64_t, ffree, 0);
 
 
 	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
@@ -1142,7 +1142,7 @@ xfs_fs_statfs(
 STATIC void
 xfs_save_resvblks(struct xfs_mount *mp)
 {
-	__uint64_t resblks = 0;
+	uint64_t resblks = 0;
 
 	mp->m_resblks_save = mp->m_resblks;
 	xfs_reserve_blocks(mp, &resblks, NULL);
@@ -1151,7 +1151,7 @@ xfs_save_resvblks(struct xfs_mount *mp)
 STATIC void
 xfs_restore_resvblks(struct xfs_mount *mp)
 {
-	__uint64_t resblks;
+	uint64_t resblks;
 
 	if (mp->m_resblks_save) {
 		resblks = mp->m_resblks_save;

diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f2cb45e..12cd9cf7 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c

@@ -43,8 +43,8 @@
 #include "xfs_log.h"
 
 /* ----- Kernel only functions below ----- */
-STATIC int
-xfs_readlink_bmap(
+int
+xfs_readlink_bmap_ilocked(
 	struct xfs_inode	*ip,
 	char			*link)
 {
@@ -143,7 +143,7 @@ xfs_readlink(
 	if (!pathlen)
 		goto out;
 
-	if (pathlen < 0 || pathlen > MAXPATHLEN) {
+	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
 		xfs_alert(mp, "%s: inode (%llu) bad symlink length (%lld)",
 			 __func__, (unsigned long long) ip->i_ino,
 			 (long long) pathlen);
@@ -153,7 +153,7 @@ xfs_readlink(
 	}
 
 
-	error = xfs_readlink_bmap(ip, link);
+	error = xfs_readlink_bmap_ilocked(ip, link);
 
  out:
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -202,7 +202,7 @@ xfs_symlink(
 	 * Check component lengths of the target path name.
 	 */
 	pathlen = strlen(target_path);
-	if (pathlen >= MAXPATHLEN)      /* total string too long */
+	if (pathlen >= XFS_SYMLINK_MAXLEN)      /* total string too long */
 		return -ENAMETOOLONG;
 
 	udqp = gdqp = NULL;
@@ -559,7 +559,7 @@ xfs_inactive_symlink(
 		return 0;
 	}
 
-	if (pathlen < 0 || pathlen > MAXPATHLEN) {
+	if (pathlen < 0 || pathlen > XFS_SYMLINK_MAXLEN) {
 		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",
 			 __func__, (unsigned long long)ip->i_ino, pathlen);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);

diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index e75245d..aeaee89 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h

@@ -21,6 +21,7 @@
 
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_inactive_symlink(struct xfs_inode *ip);
 

diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 984a349..82afee0 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h

@@ -95,6 +95,7 @@ extern xfs_param_t	xfs_params;
 
 struct xfs_globals {
 	int	log_recovery_delay;	/* log recovery delay (secs) */
+	bool	bug_on_assert;		/* BUG() the kernel on assert failure */
 };
 extern struct xfs_globals	xfs_globals;
 

diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 80ac15f..8b2ccc2 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c

@@ -90,49 +90,7 @@ to_mp(struct kobject *kobject)
 	return container_of(kobj, struct xfs_mount, m_kobj);
 }
 
-#ifdef DEBUG
-
-STATIC ssize_t
-drop_writes_store(
-	struct kobject		*kobject,
-	const char		*buf,
-	size_t			count)
-{
-	struct xfs_mount	*mp = to_mp(kobject);
-	int			ret;
-	int			val;
-
-	ret = kstrtoint(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	if (val == 1)
-		mp->m_drop_writes = true;
-	else if (val == 0)
-		mp->m_drop_writes = false;
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-STATIC ssize_t
-drop_writes_show(
-	struct kobject		*kobject,
-	char			*buf)
-{
-	struct xfs_mount	*mp = to_mp(kobject);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_drop_writes ? 1 : 0);
-}
-XFS_SYSFS_ATTR_RW(drop_writes);
-
-#endif /* DEBUG */
-
 static struct attribute *xfs_mp_attrs[] = {
-#ifdef DEBUG
-	ATTR_LIST(drop_writes),
-#endif
 	NULL,
 };
 
@@ -146,6 +104,38 @@ struct kobj_type xfs_mp_ktype = {
 /* debug */
 
 STATIC ssize_t
+bug_on_assert_store(
+	struct kobject		*kobject,
+	const char		*buf,
+	size_t			count)
+{
+	int			ret;
+	int			val;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val == 1)
+		xfs_globals.bug_on_assert = true;
+	else if (val == 0)
+		xfs_globals.bug_on_assert = false;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+STATIC ssize_t
+bug_on_assert_show(
+	struct kobject		*kobject,
+	char			*buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bug_on_assert ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(bug_on_assert);
+
+STATIC ssize_t
 log_recovery_delay_store(
 	struct kobject	*kobject,
 	const char	*buf,
@@ -176,6 +166,7 @@ log_recovery_delay_show(
 XFS_SYSFS_ATTR_RW(log_recovery_delay);
 
 static struct attribute *xfs_dbg_attrs[] = {
+	ATTR_LIST(bug_on_assert),
 	ATTR_LIST(log_recovery_delay),
 	NULL,
 };
@@ -314,47 +305,11 @@ write_grant_head_show(
 }
 XFS_SYSFS_ATTR_RO(write_grant_head);
 
-#ifdef DEBUG
-STATIC ssize_t
-log_badcrc_factor_store(
-	struct kobject	*kobject,
-	const char	*buf,
-	size_t		count)
-{
-	struct xlog	*log = to_xlog(kobject);
-	int		ret;
-	uint32_t	val;
-
-	ret = kstrtouint(buf, 0, &val);
-	if (ret)
-		return ret;
-
-	log->l_badcrc_factor = val;
-
-	return count;
-}
-
-STATIC ssize_t
-log_badcrc_factor_show(
-	struct kobject	*kobject,
-	char		*buf)
-{
-	struct xlog	*log = to_xlog(kobject);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
-}
-
-XFS_SYSFS_ATTR_RW(log_badcrc_factor);
-#endif	/* DEBUG */
-
 static struct attribute *xfs_log_attrs[] = {
 	ATTR_LIST(log_head_lsn),
 	ATTR_LIST(log_tail_lsn),
 	ATTR_LIST(reserve_grant_head),
 	ATTR_LIST(write_grant_head),
-#ifdef DEBUG
-	ATTR_LIST(log_badcrc_factor),
-#endif
 	NULL,
 };
 

diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7c5a165..bcc3cdf 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h

@@ -251,7 +251,7 @@ TRACE_EVENT(xfs_iext_insert,
 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
 		  (long)__entry->idx,
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount,
 		  __entry->state,
 		  (char *)__entry->caller_ip)
@@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
 		  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
 		  (long)__entry->idx,
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount,
 		  __entry->state,
 		  (char *)__entry->caller_ip)
@@ -367,6 +367,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
 DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
 DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
 DEFINE_BUF_EVENT(xfs_buf_get_uncached);
 DEFINE_BUF_EVENT(xfs_buf_item_relse);
 DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
@@ -1280,7 +1281,7 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
 		  __entry->count,
 		  __print_symbolic(__entry->type, XFS_IO_TYPES),
 		  __entry->startoff,
-		  (__int64_t)__entry->startblock,
+		  (int64_t)__entry->startblock,
 		  __entry->blockcount)
 )
 
@@ -1490,25 +1491,6 @@ TRACE_EVENT(xfs_extent_busy_trim,
 		  __entry->tlen)
 );
 
-TRACE_EVENT(xfs_trans_commit_lsn,
-	TP_PROTO(struct xfs_trans *trans),
-	TP_ARGS(trans),
-	TP_STRUCT__entry(
-		__field(dev_t, dev)
-		__field(struct xfs_trans *, tp)
-		__field(xfs_lsn_t, lsn)
-	),
-	TP_fast_assign(
-		__entry->dev = trans->t_mountp->m_super->s_dev;
-		__entry->tp = trans;
-		__entry->lsn = trans->t_commit_lsn;
-	),
-	TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->tp,
-		  __entry->lsn)
-);
-
 TRACE_EVENT(xfs_agf,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
 		 unsigned long caller_ip),
@@ -2057,7 +2039,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
 	TP_ARGS(log, buf_f),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
-		__field(__int64_t, blkno)
+		__field(int64_t, blkno)
 		__field(unsigned short, len)
 		__field(unsigned short, flags)
 		__field(unsigned short, size)
@@ -2106,7 +2088,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
 		__field(int, fields)
 		__field(unsigned short, asize)
 		__field(unsigned short, dsize)
-		__field(__int64_t, blkno)
+		__field(int64_t, blkno)
 		__field(int, len)
 		__field(int, boffset)
 	),
@@ -3256,8 +3238,8 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
 		__field(xfs_agnumber_t, agno)
 		__field(xfs_fsblock_t, bno)
 		__field(xfs_filblks_t, len)
-		__field(__uint64_t, owner)
-		__field(__uint64_t, offset)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
 		__field(unsigned int, flags)
 	),
 	TP_fast_assign(
@@ -3297,9 +3279,9 @@ DECLARE_EVENT_CLASS(xfs_getfsmap_class,
 		__field(dev_t, keydev)
 		__field(xfs_daddr_t, block)
 		__field(xfs_daddr_t, len)
-		__field(__uint64_t, owner)
-		__field(__uint64_t, offset)
-		__field(__uint64_t, flags)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
+		__field(uint64_t, flags)
 	),
 	TP_fast_assign(
 		__entry->dev = mp->m_super->s_dev;

diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index a07acbf..6bdad6f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h

@@ -105,10 +105,6 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
-	xfs_lsn_t		t_lsn;		/* log seq num of start of
-						 * transaction. */
-	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
-						 * transaction. */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
 	unsigned int		t_flags;	/* misc flags */
@@ -249,7 +245,7 @@ struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
 		struct xfs_rui_log_item *ruip);
 int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
 		struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
-		__uint64_t owner, int whichfork, xfs_fileoff_t startoff,
+		uint64_t owner, int whichfork, xfs_fileoff_t startoff,
 		xfs_fsblock_t startblock, xfs_filblks_t blockcount,
 		xfs_exntst_t state, struct xfs_btree_cur **pcur);
 
@@ -275,6 +271,6 @@ int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
 		struct xfs_bud_log_item *rudp, struct xfs_defer_ops *dfops,
 		enum xfs_bmap_intent_type type, struct xfs_inode *ip,
 		int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock,
-		xfs_filblks_t blockcount, xfs_exntst_t state);
+		xfs_filblks_t *blockcount, xfs_exntst_t state);
 
 #endif	/* __XFS_TRANS_H__ */

diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 6408e7d..14543d9 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c

@@ -63,7 +63,7 @@ xfs_trans_log_finish_bmap_update(
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,
-	xfs_filblks_t			blockcount,
+	xfs_filblks_t			*blockcount,
 	xfs_exntst_t			state)
 {
 	int				error;
@@ -196,16 +196,23 @@ xfs_bmap_update_finish_item(
 	void				**state)
 {
 	struct xfs_bmap_intent		*bmap;
+	xfs_filblks_t			count;
 	int				error;
 
 	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+	count = bmap->bi_bmap.br_blockcount;
 	error = xfs_trans_log_finish_bmap_update(tp, done_item, dop,
 			bmap->bi_type,
 			bmap->bi_owner, bmap->bi_whichfork,
 			bmap->bi_bmap.br_startoff,
 			bmap->bi_bmap.br_startblock,
-			bmap->bi_bmap.br_blockcount,
+			&count,
 			bmap->bi_bmap.br_state);
+	if (!error && count > 0) {
+		ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+		bmap->bi_bmap.br_blockcount = count;
+		return -EAGAIN;
+	}
 	kmem_free(bmap);
 	return error;
 }

diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 8ee29ca..86987d8 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c

@@ -356,6 +356,7 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 		 xfs_buf_t	*bp)
 {
 	xfs_buf_log_item_t	*bip;
+	int			freed;
 
 	/*
 	 * Default to a normal brelse() call if the tp is NULL.
@@ -419,16 +420,22 @@ xfs_trans_brelse(xfs_trans_t	*tp,
 	/*
 	 * Drop our reference to the buf log item.
 	 */
-	atomic_dec(&bip->bli_refcount);
+	freed = atomic_dec_and_test(&bip->bli_refcount);
 
 	/*
-	 * If the buf item is not tracking data in the log, then
-	 * we must free it before releasing the buffer back to the
-	 * free pool.  Before releasing the buffer to the free pool,
-	 * clear the transaction pointer in b_fsprivate2 to dissolve
-	 * its relation to this transaction.
+	 * If the buf item is not tracking data in the log, then we must free it
+	 * before releasing the buffer back to the free pool.
+	 *
+	 * If the fs has shutdown and we dropped the last reference, it may fall
+	 * on us to release a (possibly dirty) bli if it never made it to the
+	 * AIL (e.g., the aborted unpin already happened and didn't release it
+	 * due to our reference). Since we're already shutdown and need xa_lock,
+	 * just force remove from the AIL and release the bli here.
 	 */
-	if (!xfs_buf_item_dirty(bip)) {
+	if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
+		xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_buf_item_relse(bp);
+	} else if (!xfs_buf_item_dirty(bip)) {
 /***
 		ASSERT(bp->b_pincount == 0);
 ***/

diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 9ead064..9b577be 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c

@@ -96,7 +96,7 @@ xfs_trans_log_finish_rmap_update(
 	struct xfs_trans		*tp,
 	struct xfs_rud_log_item		*rudp,
 	enum xfs_rmap_intent_type	type,
-	__uint64_t			owner,
+	uint64_t			owner,
 	int				whichfork,
 	xfs_fileoff_t			startoff,
 	xfs_fsblock_t			startblock,

diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index 343dbdc..4f7f39a 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h

@@ -377,6 +377,13 @@ struct acpi_resource_generic_register {
 	u64 address;
 };
 
+/* Generic Address Space Access Sizes */
+#define ACPI_ACCESS_SIZE_UNDEFINED		0
+#define ACPI_ACCESS_SIZE_BYTE			1
+#define ACPI_ACCESS_SIZE_WORD			2
+#define ACPI_ACCESS_SIZE_DWORD			3
+#define ACPI_ACCESS_SIZE_QWORD			4
+
 struct acpi_resource_gpio {
 	u8 revision_id;
 	u8 connection_type;

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index d6f4aed..8719135 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h

@@ -97,6 +97,7 @@ extern void warn_slowpath_null(const char *file, const int line);
 
 /* used internally by panic.c */
 struct warn_args;
+struct pt_regs;
 
 void __warn(const char *file, int line, void *caller, unsigned taint,
 	    struct pt_regs *regs, struct warn_args *args);

diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h
index 4c8d4c8..182f832 100644
--- a/include/drm/bridge/dw_hdmi.h
+++ b/include/drm/bridge/dw_hdmi.h

@@ -22,56 +22,56 @@ struct dw_hdmi;
  * 48bit bus.
  *
  * +----------------------+----------------------------------+------------------------------+
- * + Format Name          + Format Code                      + Encodings                    +
+ * | Format Name          | Format Code                      | Encodings                    |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 8bit       + ``MEDIA_BUS_FMT_RGB888_1X24``    + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 8bit       | ``MEDIA_BUS_FMT_RGB888_1X24``    | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 10bits     + ``MEDIA_BUS_FMT_RGB101010_1X30`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 10bits     | ``MEDIA_BUS_FMT_RGB101010_1X30`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 12bits     + ``MEDIA_BUS_FMT_RGB121212_1X36`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 12bits     | ``MEDIA_BUS_FMT_RGB121212_1X36`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + RGB 4:4:4 16bits     + ``MEDIA_BUS_FMT_RGB161616_1X48`` + ``V4L2_YCBCR_ENC_DEFAULT``   +
+ * | RGB 4:4:4 16bits     | ``MEDIA_BUS_FMT_RGB161616_1X48`` | ``V4L2_YCBCR_ENC_DEFAULT``   |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 8bit     + ``MEDIA_BUS_FMT_YUV8_1X24``      + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 8bit     | ``MEDIA_BUS_FMT_YUV8_1X24``      | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 10bits   + ``MEDIA_BUS_FMT_YUV10_1X30``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 10bits   | ``MEDIA_BUS_FMT_YUV10_1X30``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 12bits   + ``MEDIA_BUS_FMT_YUV12_1X36``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 12bits   | ``MEDIA_BUS_FMT_YUV12_1X36``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:4:4 16bits   + ``MEDIA_BUS_FMT_YUV16_1X48``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV601``  +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_XV709``  +
+ * | YCbCr 4:4:4 16bits   | ``MEDIA_BUS_FMT_YUV16_1X48``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV601``  |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_XV709``  |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 8bit     + ``MEDIA_BUS_FMT_UYVY8_1X16``     + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 8bit     | ``MEDIA_BUS_FMT_UYVY8_1X16``     | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 10bits   + ``MEDIA_BUS_FMT_UYVY10_1X20``    + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 10bits   | ``MEDIA_BUS_FMT_UYVY10_1X20``    | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:2 12bits   + ``MEDIA_BUS_FMT_UYVY12_1X24``    + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:2 12bits   | ``MEDIA_BUS_FMT_UYVY12_1X24``    | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 8bit     + ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` + ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 8bit     | ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` | ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 10bits   + ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 10bits   | ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 12bits   + ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 12bits   | ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
- * + YCbCr 4:2:0 16bits   + ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``+ ``V4L2_YCBCR_ENC_601``       +
- * +                      +                                  + or ``V4L2_YCBCR_ENC_709``    +
+ * | YCbCr 4:2:0 16bits   | ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``| ``V4L2_YCBCR_ENC_601``       |
+ * |                      |                                  | or ``V4L2_YCBCR_ENC_709``    |
  * +----------------------+----------------------------------+------------------------------+
  */
 

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 99f96df..c749eef 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -57,6 +57,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
 	acpi_fwnode_handle(adev) : NULL)
 #define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
 
+
+extern const struct fwnode_operations acpi_fwnode_ops;
+
 static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 {
 	struct fwnode_handle *fwnode;
@@ -66,6 +69,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
 		return NULL;
 
 	fwnode->type = FWNODE_ACPI_STATIC;
+	fwnode->ops = &acpi_fwnode_ops;
 
 	return fwnode;
 }

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ace73f9..854e1bd 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h

@@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
 	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
 }
 
-static inline void __inc_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
+static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
 	__add_wb_stat(wb, item, 1);
 }
 
-static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__inc_wb_stat(wb, item);
-	local_irq_restore(flags);
-}
-
-static inline void __dec_wb_stat(struct bdi_writeback *wb,
-				 enum wb_stat_item item)
-{
-	__add_wb_stat(wb, item, -1);
-}
-
 static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__dec_wb_stat(wb, item);
-	local_irq_restore(flags);
+	__add_wb_stat(wb, item, -1);
 }
 
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
@@ -104,22 +84,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 	return percpu_counter_read_positive(&wb->stat[item]);
 }
 
-static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
-				enum wb_stat_item item)
-{
-	return percpu_counter_sum_positive(&wb->stat[item]);
-}
-
 static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	s64 sum;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	sum = __wb_stat_sum(wb, item);
-	local_irq_restore(flags);
-
-	return sum;
+	return percpu_counter_sum_positive(&wb->stat[item]);
 }
 
 extern void wb_writeout_inc(struct bdi_writeback *wb);

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 664a27d..7b1cf4b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -165,10 +165,27 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio))
+	if (bio_no_advance_iter(bio)) {
 		iter->bi_size -= bytes;
-	else
+		iter->bi_done += bytes;
+	} else {
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		/* TODO: It is reasonable to complete bio with error here. */
+	}
+}
+
+static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
+		unsigned int bytes)
+{
+	iter->bi_sector -= bytes >> 9;
+
+	if (bio_no_advance_iter(bio)) {
+		iter->bi_size += bytes;
+		iter->bi_done -= bytes;
+		return true;
+	}
+
+	return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
@@ -303,8 +320,6 @@ struct bio_integrity_payload {
 
 	struct bvec_iter	bip_iter;
 
-	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
-
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
@@ -722,13 +737,10 @@ struct biovec_slab {
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_prep(struct bio *);
-extern void bio_integrity_endio(struct bio *);
+extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
-extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
+extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -741,11 +753,6 @@ static inline void *bio_integrity(struct bio *bio)
 	return NULL;
 }
 
-static inline bool bio_integrity_enabled(struct bio *bio)
-{
-	return false;
-}
-
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
 	return 0;
@@ -756,14 +763,9 @@ static inline void bioset_integrity_free (struct bio_set *bs)
 	return;
 }
 
-static inline int bio_integrity_prep(struct bio *bio)
+static inline bool bio_integrity_prep(struct bio *bio)
 {
-	return 0;
-}
-
-static inline void bio_integrity_free(struct bio *bio)
-{
-	return;
+	return true;
 }
 
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
@@ -778,8 +780,7 @@ static inline void bio_integrity_advance(struct bio *bio,
 	return;
 }
 
-static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
-				      unsigned int sectors)
+static inline void bio_integrity_trim(struct bio *bio)
 {
 	return;
 }

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3b77588..5797ca6 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h

@@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1,
 extern int __bitmap_subset(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, unsigned int nbits);
 extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-
-extern void bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
+extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
 						    unsigned long size,
@@ -267,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
-#ifdef CONFIG_S390
-	if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+	if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
 		return !memcmp(src1, src2, nbits / 8);
-#endif
 	return __bitmap_equal(src1, src2, nbits);
 }
 
@@ -315,6 +312,30 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int
 	return __bitmap_weight(src, nbits);
 }
 
+static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__set_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0xff, nbits / 8);
+	else
+		__bitmap_set(map, start, nbits);
+}
+
+static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
+		unsigned int nbits)
+{
+	if (__builtin_constant_p(nbits) && nbits == 1)
+		__clear_bit(start, map);
+	else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) &&
+		 __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8))
+		memset((char *)map + start / 8, 0, nbits / 8);
+	else
+		__bitmap_clear(map, start, nbits);
+}
+
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
 				unsigned int shift, int nbits)
 {

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e0abeba..c8dae555 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h

@@ -202,6 +202,8 @@ void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
 int bh_submit_read(struct buffer_head *bh);
+loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset,
+				 loff_t length, int whence);
 
 extern int buffer_heads_over_limit;
 

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 687b557..5d5554c 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h

@@ -3,6 +3,7 @@
 
 #include <asm/bug.h>
 #include <linux/compiler.h>
+#include <linux/build_bug.h>
 
 enum bug_trap_type {
 	BUG_TRAP_TYPE_NONE = 0,
@@ -13,80 +14,9 @@ enum bug_trap_type {
 struct pt_regs;
 
 #ifdef __CHECKER__
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
-#define BUILD_BUG_ON_ZERO(e) (0)
-#define BUILD_BUG_ON_NULL(e) ((void*)0)
-#define BUILD_BUG_ON_INVALID(e) (0)
-#define BUILD_BUG_ON_MSG(cond, msg) (0)
-#define BUILD_BUG_ON(condition) (0)
-#define BUILD_BUG() (0)
 #define MAYBE_BUILD_BUG_ON(cond) (0)
 #else /* __CHECKER__ */
 
-/* Force a compilation error if a constant expression is not a power of 2 */
-#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
-	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
-	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
-
-/* Force a compilation error if condition is true, but also produce a
-   result (of value 0 and type size_t), so the expression can be used
-   e.g. in a structure initializer (or where-ever else comma expressions
-   aren't permitted). */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
-
-/*
- * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
- * expression but avoids the generation of any code, even if that expression
- * has side-effects.
- */
-#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
-
-/**
- * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
- *		      error message.
- * @condition: the condition which the compiler should know is false.
- *
- * See BUILD_BUG_ON for description.
- */
-#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
-
-/**
- * BUILD_BUG_ON - break compile if a condition is true.
- * @condition: the condition which the compiler should know is false.
- *
- * If you have some code which relies on certain constants being equal, or
- * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
- * detect if someone changes it.
- *
- * The implementation uses gcc's reluctance to create a negative array, but gcc
- * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
- * inline functions).  Luckily, in 4.3 they added the "error" function
- * attribute just for this type of case.  Thus, we use a negative sized array
- * (should always create an error on gcc versions older than 4.4) and then call
- * an undefined function with the error attribute (should always create an
- * error on gcc 4.3 and later).  If for some reason, neither creates a
- * compile-time error, we'll still have a link-time error, which is harder to
- * track down.
- */
-#ifndef __OPTIMIZE__
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#else
-#define BUILD_BUG_ON(condition) \
-	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
-#endif
-
-/**
- * BUILD_BUG - break compile if used.
- *
- * If you have some code that you expect the compiler to eliminate at
- * build time, you should use BUILD_BUG to detect if it is
- * unexpectedly used.
- */
-#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
-
 #define MAYBE_BUILD_BUG_ON(cond)			\
 	do {						\
 		if (__builtin_constant_p((cond)))       \

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
new file mode 100644
index 0000000..b7d22d6
--- /dev/null
+++ b/include/linux/build_bug.h

@@ -0,0 +1,84 @@
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
+#define BUILD_BUG_ON_ZERO(e) (0)
+#define BUILD_BUG_ON_NULL(e) ((void *)0)
+#define BUILD_BUG_ON_INVALID(e) (0)
+#define BUILD_BUG_ON_MSG(cond, msg) (0)
+#define BUILD_BUG_ON(condition) (0)
+#define BUILD_BUG() (0)
+#else /* __CHECKER__ */
+
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
+	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type size_t), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); }))
+
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ *		      error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ *
+ * The implementation uses gcc's reluctance to create a negative array, but gcc
+ * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
+ * inline functions).  Luckily, in 4.3 they added the "error" function
+ * attribute just for this type of case.  Thus, we use a negative sized array
+ * (should always create an error on gcc versions older than 4.4) and then call
+ * an undefined function with the error attribute (should always create an
+ * error on gcc 4.3 and later).  If for some reason, neither creates a
+ * compile-time error, we'll still have a link-time error, which is harder to
+ * track down.
+ */
+#ifndef __OPTIMIZE__
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#else
+#define BUILD_BUG_ON(condition) \
+	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+#endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+
+#endif	/* __CHECKER__ */
+
+#endif	/* _LINUX_BUILD_BUG_H */

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 89b65b82..ec8a4d7 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h

@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/errno.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -39,6 +40,8 @@ struct bvec_iter {
 
 	unsigned int		bi_idx;		/* current index into bvl_vec */
 
+	unsigned int            bi_done;	/* number of bytes completed */
+
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
 };
@@ -66,12 +69,14 @@ struct bvec_iter {
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline void bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
-	WARN_ONCE(bytes > iter->bi_size,
-		  "Attempted to advance past end of bvec iter\n");
+	if (WARN_ONCE(bytes > iter->bi_size,
+		     "Attempted to advance past end of bvec iter\n")) {
+		iter->bi_size = 0;
+		return false;
+	}
 
 	while (bytes) {
 		unsigned iter_len = bvec_iter_len(bv, *iter);
@@ -80,12 +85,38 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
+		iter->bi_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
 			iter->bi_idx++;
 		}
 	}
+	return true;
+}
+
+static inline bool bvec_iter_rewind(const struct bio_vec *bv,
+				     struct bvec_iter *iter,
+				     unsigned int bytes)
+{
+	while (bytes) {
+		unsigned len = min(bytes, iter->bi_bvec_done);
+
+		if (iter->bi_bvec_done == 0) {
+			if (WARN_ONCE(iter->bi_idx == 0,
+				      "Attempted to rewind iter beyond "
+				      "bvec's boundaries\n")) {
+				return false;
+			}
+			iter->bi_idx--;
+			iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
+			continue;
+		}
+		bytes -= len;
+		iter->bi_size += len;
+		iter->bi_bvec_done -= len;
+	}
+	return true;
 }
 
 #define for_each_bvec(bvl, bio_vec, iter, start)			\

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index fd8b295..f0f6c53 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h

@@ -2,103 +2,174 @@
 #define __CEPH_FEATURES
 
 /*
- * feature bits
+ * Each time we reclaim bits for reuse we need to specify another bit
+ * that, if present, indicates we have the new incarnation of that
+ * feature.  Base case is 1 (first use).
  */
-#define CEPH_FEATURE_UID            (1ULL<<0)
-#define CEPH_FEATURE_NOSRCADDR      (1ULL<<1)
-#define CEPH_FEATURE_MONCLOCKCHECK  (1ULL<<2)
-#define CEPH_FEATURE_FLOCK          (1ULL<<3)
-#define CEPH_FEATURE_SUBSCRIBE2     (1ULL<<4)
-#define CEPH_FEATURE_MONNAMES       (1ULL<<5)
-#define CEPH_FEATURE_RECONNECT_SEQ  (1ULL<<6)
-#define CEPH_FEATURE_DIRLAYOUTHASH  (1ULL<<7)
-#define CEPH_FEATURE_OBJECTLOCATOR  (1ULL<<8)
-#define CEPH_FEATURE_PGID64         (1ULL<<9)
-#define CEPH_FEATURE_INCSUBOSDMAP   (1ULL<<10)
-#define CEPH_FEATURE_PGPOOL3        (1ULL<<11)
-#define CEPH_FEATURE_OSDREPLYMUX    (1ULL<<12)
-#define CEPH_FEATURE_OSDENC         (1ULL<<13)
-#define CEPH_FEATURE_OMAP           (1ULL<<14)
-#define CEPH_FEATURE_MONENC         (1ULL<<15)
-#define CEPH_FEATURE_QUERY_T        (1ULL<<16)
-#define CEPH_FEATURE_INDEP_PG_MAP   (1ULL<<17)
-#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18)
-#define CEPH_FEATURE_CHUNKY_SCRUB   (1ULL<<19)
-#define CEPH_FEATURE_MON_NULLROUTE  (1ULL<<20)
-#define CEPH_FEATURE_MON_GV         (1ULL<<21)
-#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22)
-#define CEPH_FEATURE_MSG_AUTH	    (1ULL<<23)
-#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24)
-#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25)
-#define CEPH_FEATURE_CREATEPOOLID   (1ULL<<26)
-#define CEPH_FEATURE_REPLY_CREATE_INODE   (1ULL<<27)
-#define CEPH_FEATURE_OSD_HBMSGS     (1ULL<<28)
-#define CEPH_FEATURE_MDSENC         (1ULL<<29)
-#define CEPH_FEATURE_OSDHASHPSPOOL  (1ULL<<30)
-#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31)
-#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32)
-#define CEPH_FEATURE_MON_SCRUB      (1ULL<<33)
-#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34)
-#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35)
-#define CEPH_FEATURE_CRUSH_V2      (1ULL<<36)  /* new indep; SET_* steps */
-#define CEPH_FEATURE_EXPORT_PEER   (1ULL<<37)
-#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
-#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38)   /* overlap with EC */
-/* The process supports new-style OSDMap encoding. Monitors also use
-   this bit to determine if peers support NAK messages. */
-#define CEPH_FEATURE_OSDMAP_ENC    (1ULL<<39)
-#define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
-#define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
-#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
-#define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
-#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
-#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
-#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
-#define CEPH_FEATURE_OSD_REPOP         (1ULL<<46)   /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
-#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
-#define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
-#define CEPH_FEATURE_CRUSH_V4      (1ULL<<48)  /* straw2 buckets */
-#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
-// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
-#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
-#define CEPH_FEATURE_MON_METADATA (1ULL<<50)
-#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */
-#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52)
-#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53)
-#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54)
-#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55)
-#define CEPH_FEATURE_NEW_OSDOP_ENCODING   (1ULL<<56) /* New, v7 encoding */
-#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */
-#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */
-#define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
-// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
-#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
-#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
+#define CEPH_FEATURE_INCARNATION_1 (0ull)
+#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
+
+#define DEFINE_CEPH_FEATURE(bit, incarnation, name)			\
+	const static uint64_t CEPH_FEATURE_##name = (1ULL<<bit);		\
+	const static uint64_t CEPH_FEATUREMASK_##name =			\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
+
+/* this bit is ignored but still advertised by release *when* */
+#define DEFINE_CEPH_FEATURE_DEPRECATED(bit, incarnation, name, when) \
+	const static uint64_t DEPRECATED_CEPH_FEATURE_##name = (1ULL<<bit); \
+	const static uint64_t DEPRECATED_CEPH_FEATUREMASK_##name =		\
+		(1ULL<<bit | CEPH_FEATURE_INCARNATION_##incarnation);
 
 /*
- * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
- * vector to evaluate to 64 bit ~0.  To cope, we designate 1ULL << 63
- * to mean 33 bit ~0, and introduce a helper below to do the
- * translation.
- *
- * This was introduced by ceph.git commit
- *   9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8
- * and fixed by ceph.git commit
- *   4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c
+ * this bit is ignored by release *unused* and not advertised by
+ * release *unadvertised*
  */
-#define CEPH_FEATURE_RESERVED (1ULL<<63)
+#define DEFINE_CEPH_FEATURE_RETIRED(bit, inc, name, unused, unadvertised)
 
-static inline u64 ceph_sanitize_features(u64 features)
-{
-	if (features & CEPH_FEATURE_RESERVED) {
-		/* everything through OSD_SNAPMAPPER */
-		return 0x1ffffffffull;
-	} else {
-		return features;
-	}
-}
+
+/*
+ * test for a feature.  this test is safer than a typical mask against
+ * the bit because it ensures that we have the bit AND the marker for the
+ * bit's incarnation.  this must be used in any case where the features
+ * bits may include an old meaning of the bit.
+ */
+#define CEPH_HAVE_FEATURE(x, name)			\
+	(((x) & (CEPH_FEATUREMASK_##name)) == (CEPH_FEATUREMASK_##name))
+
+
+/*
+ * Notes on deprecation:
+ *
+ * A *major* release is a release through which all upgrades must pass
+ * (e.g., jewel).  For example, no pre-jewel server will ever talk to
+ * a post-jewel server (mon, osd, etc).
+ *
+ * For feature bits used *only* on the server-side:
+ *
+ *  - In the first phase we indicate that a feature is DEPRECATED as of
+ *    a particular release.  This is the first major release X (say,
+ *    jewel) that does not depend on its peers advertising the feature.
+ *    That is, it safely assumes its peers all have the feature.  We
+ *    indicate this with the DEPRECATED macro.  For example,
+ *
+ *      DEFINE_CEPH_FEATURE_DEPRECATED( 2, 1, MONCLOCKCHECK, JEWEL)
+ *
+ *    because 10.2.z (jewel) did not care if its peers advertised this
+ *    feature bit.
+ *
+ *  - In the second phase we stop advertising the the bit and call it
+ *    RETIRED.  This can normally be done in the *next* major release
+ *    following the one in which we marked the feature DEPRECATED.  In
+ *    the above example, for 12.0.z (luminous) we can say:
+ *
+ *      DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+ *
+ *  - The bit can be reused in the first post-luminous release, 13.0.z
+ *    (m).
+ *
+ * This ensures that no two versions who have different meanings for
+ * the bit ever speak to each other.
+ */
+
+DEFINE_CEPH_FEATURE( 0, 1, UID)
+DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
+DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
+DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
+DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
+DEFINE_CEPH_FEATURE( 6, 1, RECONNECT_SEQ)
+DEFINE_CEPH_FEATURE( 7, 1, DIRLAYOUTHASH)
+DEFINE_CEPH_FEATURE( 8, 1, OBJECTLOCATOR)
+DEFINE_CEPH_FEATURE( 9, 1, PGID64)
+DEFINE_CEPH_FEATURE(10, 1, INCSUBOSDMAP)
+DEFINE_CEPH_FEATURE(11, 1, PGPOOL3)
+DEFINE_CEPH_FEATURE(12, 1, OSDREPLYMUX)
+DEFINE_CEPH_FEATURE(13, 1, OSDENC)
+DEFINE_CEPH_FEATURE_RETIRED(14, 1, OMAP, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(14, 2, SERVER_KRAKEN)
+DEFINE_CEPH_FEATURE(15, 1, MONENC)
+DEFINE_CEPH_FEATURE_RETIRED(16, 1, QUERY_T, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(17, 1, INDEP_PG_MAP, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(18, 1, CRUSH_TUNABLES)
+DEFINE_CEPH_FEATURE_RETIRED(19, 1, CHUNKY_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(20, 1, MON_NULLROUTE, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(21, 1, MON_GV, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(21, 2, SERVER_LUMINOUS)
+DEFINE_CEPH_FEATURE(21, 2, RESEND_ON_SPLIT)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, RADOS_BACKOFF)    // overlap
+DEFINE_CEPH_FEATURE(21, 2, OSDMAP_PG_UPMAP)  // overlap
+DEFINE_CEPH_FEATURE(21, 2, CRUSH_CHOOSE_ARGS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(22, 1, BACKFILL_RESERVATION, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(23, 1, MSG_AUTH)
+DEFINE_CEPH_FEATURE_RETIRED(24, 1, RECOVERY_RESERVATION, JEWEL, LUNINOUS)
+
+DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
+DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
+DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
+DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
+DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
+DEFINE_CEPH_FEATURE(29, 1, MDSENC)
+DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
+DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS)  // deprecate me
+DEFINE_CEPH_FEATURE_RETIRED(32, 1, OSD_SNAPMAPPER, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(33, 1, MON_SCRUB, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE_RETIRED(34, 1, OSD_PACKED_RECOVERY, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(35, 1, OSD_CACHEPOOL)
+DEFINE_CEPH_FEATURE(36, 1, CRUSH_V2)
+DEFINE_CEPH_FEATURE(37, 1, EXPORT_PEER)
+DEFINE_CEPH_FEATURE(38, 1, OSD_ERASURE_CODES)
+DEFINE_CEPH_FEATURE(38, 1, OSD_OSD_TMAP2OMAP) // overlap
+DEFINE_CEPH_FEATURE(39, 1, OSDMAP_ENC)
+DEFINE_CEPH_FEATURE(40, 1, MDS_INLINE_DATA)
+DEFINE_CEPH_FEATURE(41, 1, CRUSH_TUNABLES3)
+DEFINE_CEPH_FEATURE(41, 1, OSD_PRIMARY_AFFINITY) // overlap
+DEFINE_CEPH_FEATURE(42, 1, MSGR_KEEPALIVE2)
+DEFINE_CEPH_FEATURE(43, 1, OSD_POOLRESEND)
+DEFINE_CEPH_FEATURE(44, 1, ERASURE_CODE_PLUGINS_V2)
+DEFINE_CEPH_FEATURE_RETIRED(45, 1, OSD_SET_ALLOC_HINT, JEWEL, LUMINOUS)
+
+DEFINE_CEPH_FEATURE(46, 1, OSD_FADVISE_FLAGS)
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_REPOP, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_OBJECT_DIGEST, JEWEL, LUMINOUS) // overlap
+DEFINE_CEPH_FEATURE_RETIRED(46, 1, OSD_TRANSACTION_MAY_LAYOUT, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(47, 1, MDS_QUOTA)
+DEFINE_CEPH_FEATURE(48, 1, CRUSH_V4)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_MIN_SIZE_RECOVERY, JEWEL, LUMINOUS)
+DEFINE_CEPH_FEATURE_RETIRED(49, 1, OSD_PROXY_FEATURES, JEWEL, LUMINOUS) // overlap
+
+DEFINE_CEPH_FEATURE(50, 1, MON_METADATA)
+DEFINE_CEPH_FEATURE(51, 1, OSD_BITWISE_HOBJ_SORT)
+DEFINE_CEPH_FEATURE(52, 1, OSD_PROXY_WRITE_FEATURES)
+DEFINE_CEPH_FEATURE(53, 1, ERASURE_CODE_PLUGINS_V3)
+DEFINE_CEPH_FEATURE(54, 1, OSD_HITSET_GMT)
+DEFINE_CEPH_FEATURE(55, 1, HAMMER_0_94_4)
+DEFINE_CEPH_FEATURE(56, 1, NEW_OSDOP_ENCODING)
+DEFINE_CEPH_FEATURE(57, 1, MON_STATEFUL_SUB)
+DEFINE_CEPH_FEATURE(57, 1, MON_ROUTE_OSDMAP) // overlap
+DEFINE_CEPH_FEATURE(57, 1, OSDSUBOP_NO_SNAPCONTEXT) // overlap
+DEFINE_CEPH_FEATURE(57, 1, SERVER_JEWEL) // overlap
+DEFINE_CEPH_FEATURE(58, 1, CRUSH_TUNABLES5)
+DEFINE_CEPH_FEATURE(58, 1, NEW_OSDOPREPLY_ENCODING) // overlap
+DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
+DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
+DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
+DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
+DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+
+DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
+DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
+DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
+
 
 /*
  * Features supported.
@@ -113,6 +184,11 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_SERVER_LUMINOUS |		\
+	 CEPH_FEATURE_RESEND_ON_SPLIT |		\
+	 CEPH_FEATURE_RADOS_BACKOFF |		\
+	 CEPH_FEATURE_OSDMAP_PG_UPMAP |		\
+	 CEPH_FEATURE_CRUSH_CHOOSE_ARGS |	\
 	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
@@ -126,7 +202,11 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_CRUSH_TUNABLES3 |		\
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
+	 CEPH_FEATURE_OSD_POOLRESEND |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
+	 CEPH_FEATURE_NEW_OSDOP_ENCODING |	\
+	 CEPH_FEATURE_SERVER_JEWEL |		\
+	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
 

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index ad078eb..edf5b04 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h

@@ -147,6 +147,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_OSD_OP                 42
 #define CEPH_MSG_OSD_OPREPLY            43
 #define CEPH_MSG_WATCH_NOTIFY           44
+#define CEPH_MSG_OSD_BACKOFF            61
 
 
 /* watch-notify operations */

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index f990f2c..14af9b7 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h

@@ -133,6 +133,66 @@ static inline char *ceph_extract_encoded_string(void **p, void *end,
 }
 
 /*
+ * skip helpers
+ */
+#define ceph_decode_skip_n(p, end, n, bad)			\
+	do {							\
+		ceph_decode_need(p, end, n, bad);		\
+                *p += n;					\
+	} while (0)
+
+#define ceph_decode_skip_64(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u64), bad)
+
+#define ceph_decode_skip_32(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u32), bad)
+
+#define ceph_decode_skip_16(p, end, bad)			\
+ceph_decode_skip_n(p, end, sizeof(u16), bad)
+
+#define ceph_decode_skip_8(p, end, bad)				\
+ceph_decode_skip_n(p, end, sizeof(u8), bad)
+
+#define ceph_decode_skip_string(p, end, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		ceph_decode_skip_n(p, end, len, bad);		\
+	} while (0)
+
+#define ceph_decode_skip_set(p, end, type, bad)			\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--)					\
+			ceph_decode_skip_##type(p, end, bad);	\
+	} while (0)
+
+#define ceph_decode_skip_map(p, end, ktype, vtype, bad)		\
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype(p, end, bad);	\
+			ceph_decode_skip_##vtype(p, end, bad);	\
+		}						\
+	} while (0)
+
+#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \
+	do {							\
+		u32 len;					\
+								\
+		ceph_decode_32_safe(p, end, len, bad);		\
+		while (len--) {					\
+			ceph_decode_skip_##ktype1(p, end, bad);	\
+			ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \
+		}						\
+	} while (0)
+
+/*
  * struct ceph_timespec <-> struct timespec
  */
 static inline void ceph_decode_timespec(struct timespec *ts,

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 3229ae6..8a79587 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h

@@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len)
 		(off >> PAGE_SHIFT);
 }
 
-/*
- * These are not meant to be generic - an integer key is assumed.
- */
-#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+#define RB_BYVAL(a)      (a)
+#define RB_BYPTR(a)      (&(a))
+#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
 static void insert_##name(struct rb_root *root, type *t)		\
 {									\
 	struct rb_node **n = &root->rb_node;				\
@@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t)		\
 									\
 	while (*n) {							\
 		type *cur = rb_entry(*n, type, nodefld);		\
+		int cmp;						\
 									\
 		parent = *n;						\
-		if (t->keyfld < cur->keyfld)				\
+		cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld));	\
+		if (cmp < 0)						\
 			n = &(*n)->rb_left;				\
-		else if (t->keyfld > cur->keyfld)			\
+		else if (cmp > 0)					\
 			n = &(*n)->rb_right;				\
 		else							\
 			BUG();						\
@@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t)			\
 	RB_CLEAR_NODE(&t->nodefld);					\
 }
 
-#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
-extern type __lookup_##name##_key;					\
-static type *lookup_##name(struct rb_root *root,			\
-			   typeof(__lookup_##name##_key.keyfld) key)	\
+/*
+ * @lookup_param_type is a parameter and not constructed from (@type,
+ * @keyfld) with typeof() because adding const is too unwieldy.
+ */
+#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,	\
+			       lookup_param_type, nodefld)		\
+static type *lookup_##name(struct rb_root *root, lookup_param_type key)	\
 {									\
 	struct rb_node *n = root->rb_node;				\
 									\
 	while (n) {							\
 		type *cur = rb_entry(n, type, nodefld);			\
+		int cmp;						\
 									\
-		if (key < cur->keyfld)					\
+		cmp = cmpexp(key, keyexp(cur->keyfld));			\
+		if (cmp < 0)						\
 			n = n->rb_left;					\
-		else if (key > cur->keyfld)				\
+		else if (cmp > 0)					\
 			n = n->rb_right;				\
 		else							\
 			return cur;					\
@@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root,			\
 	return NULL;							\
 }
 
+#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp,		\
+			 lookup_param_type, nodefld)			\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld)	\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp,		\
+		       lookup_param_type, nodefld)
+
+/*
+ * Shorthands for integer keys.
+ */
+#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)		\
+DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld)
+
+#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)		\
+extern type __lookup_##name##_key;					\
+DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL,	\
+		       typeof(__lookup_##name##_key.keyfld), nodefld)
+
 #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld)			\
 DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld)

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c5c4c71..fbd94d9 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h

@@ -44,6 +44,8 @@ struct ceph_connection_operations {
 					struct ceph_msg_header *hdr,
 					int *skip);
 
+	void (*reencode_message) (struct ceph_msg *msg);
+
 	int (*sign_message) (struct ceph_msg *msg);
 	int (*check_message_signature) (struct ceph_msg *msg);
 };

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 85650b4..c6d96a5 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h

@@ -1,6 +1,7 @@
 #ifndef _FS_CEPH_OSD_CLIENT_H
 #define _FS_CEPH_OSD_CLIENT_H
 
+#include <linux/bitrev.h>
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/mempool.h>
@@ -36,6 +37,8 @@ struct ceph_osd {
 	struct ceph_connection o_con;
 	struct rb_root o_requests;
 	struct rb_root o_linger_requests;
+	struct rb_root o_backoff_mappings;
+	struct rb_root o_backoffs_by_id;
 	struct list_head o_osd_lru;
 	struct ceph_auth_handshake o_auth;
 	unsigned long lru_ttl;
@@ -136,7 +139,8 @@ struct ceph_osd_request_target {
 	struct ceph_object_id target_oid;
 	struct ceph_object_locator target_oloc;
 
-	struct ceph_pg pgid;
+	struct ceph_pg pgid;               /* last raw pg we mapped to */
+	struct ceph_spg spgid;             /* last actual spg we mapped to */
 	u32 pg_num;
 	u32 pg_num_mask;
 	struct ceph_osds acting;
@@ -148,6 +152,9 @@ struct ceph_osd_request_target {
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
 
+	u32 epoch;
+	u32 last_force_resend;
+
 	int osd;
 };
 
@@ -193,7 +200,6 @@ struct ceph_osd_request {
 	unsigned long r_stamp;                /* jiffies, send or check time */
 	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
-	u32 r_last_force_resend;
 	u32 r_map_dne_bound;
 
 	struct ceph_osd_req_op r_ops[];
@@ -203,6 +209,23 @@ struct ceph_request_redirect {
 	struct ceph_object_locator oloc;
 };
 
+/*
+ * osd request identifier
+ *
+ * caller name + incarnation# + tid to unique identify this request
+ */
+struct ceph_osd_reqid {
+	struct ceph_entity_name name;
+	__le64 tid;
+	__le32 inc;
+} __packed;
+
+struct ceph_blkin_trace_info {
+	__le64 trace_id;
+	__le64 span_id;
+	__le64 parent_span_id;
+} __packed;
+
 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
 				 u64 notifier_id, void *data, size_t data_len);
 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
@@ -221,7 +244,6 @@ struct ceph_osd_linger_request {
 	struct list_head pending_lworks;
 
 	struct ceph_osd_request_target t;
-	u32 last_force_resend;
 	u32 map_dne_bound;
 
 	struct timespec mtime;
@@ -256,6 +278,48 @@ struct ceph_watch_item {
 	struct ceph_entity_addr addr;
 };
 
+struct ceph_spg_mapping {
+	struct rb_node node;
+	struct ceph_spg spgid;
+
+	struct rb_root backoffs;
+};
+
+struct ceph_hobject_id {
+	void *key;
+	size_t key_len;
+	void *oid;
+	size_t oid_len;
+	u64 snapid;
+	u32 hash;
+	u8 is_max;
+	void *nspace;
+	size_t nspace_len;
+	s64 pool;
+
+	/* cache */
+	u32 hash_reverse_bits;
+};
+
+static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
+{
+	hoid->hash_reverse_bits = bitrev32(hoid->hash);
+}
+
+/*
+ * PG-wide backoff: [begin, end)
+ * per-object backoff: begin == end
+ */
+struct ceph_osd_backoff {
+	struct rb_node spg_node;
+	struct rb_node id_node;
+
+	struct ceph_spg spgid;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
 #define CEPH_LINGER_ID_START	0xffff000000000000ULL
 
 struct ceph_osd_client {

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 938656f..a0996cb9 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h

@@ -24,7 +24,15 @@ struct ceph_pg {
 	uint32_t seed;
 };
 
+#define CEPH_SPG_NOSHARD	-1
+
+struct ceph_spg {
+	struct ceph_pg pgid;
+	s8 shard;
+};
+
 int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
 
 #define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
 						       together */
@@ -135,10 +143,14 @@ struct ceph_pg_mapping {
 		struct {
 			int len;
 			int osds[];
-		} pg_temp;
+		} pg_temp, pg_upmap;
 		struct {
 			int osd;
 		} primary_temp;
+		struct {
+			int len;
+			int from_to[][2];
+		} pg_upmap_items;
 	};
 };
 
@@ -150,13 +162,17 @@ struct ceph_osdmap {
 	u32 flags;         /* CEPH_OSDMAP_* */
 
 	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
-	u8 *osd_state;     /* CEPH_OSD_* */
+	u32 *osd_state;    /* CEPH_OSD_* */
 	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
 	struct ceph_entity_addr *osd_addr;
 
 	struct rb_root pg_temp;
 	struct rb_root primary_temp;
 
+	/* remap (post-CRUSH, pre-up) */
+	struct rb_root pg_upmap;	/* PG := raw set */
+	struct rb_root pg_upmap_items;	/* from -> to within raw set */
+
 	u32 *osd_primary_affinity;
 
 	struct rb_root pg_pools;
@@ -187,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd)
 	return !ceph_osd_is_up(map, osd);
 }
 
-extern char *ceph_osdmap_state_str(char *str, int len, int state);
+char *ceph_osdmap_state_str(char *str, int len, u32 state);
 extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
 
 static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
@@ -198,11 +214,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
 	return &map->osd_addr[osd];
 }
 
+#define CEPH_PGID_ENCODING_LEN		(1 + 8 + 4 + 4)
+
 static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
 {
 	__u8 version;
 
-	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
+	if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
 		pr_warn("incomplete pg encoding\n");
 		return -EINVAL;
 	}
@@ -240,6 +258,8 @@ static inline void ceph_osds_init(struct ceph_osds *set)
 
 void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
 
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num);
 bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  const struct ceph_osds *new_acting,
 			  const struct ceph_osds *old_up,
@@ -262,15 +282,24 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 					 u64 off, u64 len,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid);
 int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
 			      struct ceph_pg *raw_pgid);
 
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting);
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid);
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid);
 

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5d00187..385db08 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h

@@ -439,6 +439,12 @@ enum {
 
 const char *ceph_osd_watch_op_name(int o);
 
+enum {
+	CEPH_OSD_BACKOFF_OP_BLOCK = 1,
+	CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
+	CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
+};
+
 /*
  * an individual object operation.  each may be accompanied by some data
  * payload

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 91bd464..12c96d9 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h

@@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
+static inline int clk_bulk_prepare_enable(int num_clks,
+					  struct clk_bulk_data *clks)
+{
+	int ret;
+
+	ret = clk_bulk_prepare(num_clks, clks);
+	if (ret)
+		return ret;
+	ret = clk_bulk_enable(num_clks, clks);
+	if (ret)
+		clk_bulk_unprepare(num_clks, clks);
+
+	return ret;
+}
+
+static inline void clk_bulk_disable_unprepare(int num_clks,
+					      struct clk_bulk_data *clks)
+{
+	clk_bulk_disable(num_clks, clks);
+	clk_bulk_unprepare(num_clks, clks);
+}
+
 #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
 struct clk *of_clk_get(struct device_node *np, int index);
 struct clk *of_clk_get_by_name(struct device_node *np, const char *name);

diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c156f50..d4292eb 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h

@@ -28,47 +28,49 @@
 #include <linux/thermal.h>
 #include <linux/cpumask.h>
 
+struct cpufreq_policy;
+
 typedef int (*get_static_t)(cpumask_t *cpumask, int interval,
 			    unsigned long voltage, u32 *power);
 
 #ifdef CONFIG_CPU_THERMAL
 /**
  * cpufreq_cooling_register - function to create cpufreq cooling device.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus);
+cpufreq_cooling_register(struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func);
 
 /**
  * of_cpufreq_cooling_register - create cpufreq cooling device based on DT.
  * @np: a valid struct device_node to the cooling device device tree node.
- * @clip_cpus: cpumask of cpus where the frequency constraints will happen
+ * @policy: cpufreq policy.
  */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus);
+			    struct cpufreq_policy *policy);
 
 struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func);
 #else
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
@@ -82,15 +84,14 @@ of_cpufreq_power_cooling_register(struct device_node *np,
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev);
 
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq);
 #else /* !CONFIG_CPU_THERMAL */
 static inline struct thermal_cooling_device *
-cpufreq_cooling_register(const struct cpumask *clip_cpus)
+cpufreq_cooling_register(struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 static inline struct thermal_cooling_device *
-cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
+cpufreq_power_cooling_register(struct cpufreq_policy *policy,
 			       u32 capacitance, get_static_t plat_static_func)
 {
 	return NULL;
@@ -98,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus,
 
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
-			    const struct cpumask *clip_cpus)
+			    struct cpufreq_policy *policy)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct thermal_cooling_device *
 of_cpufreq_power_cooling_register(struct device_node *np,
-				  const struct cpumask *clip_cpus,
+				  struct cpufreq_policy *policy,
 				  u32 capacitance,
 				  get_static_t plat_static_func)
 {
@@ -117,11 +118,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	return;
 }
-static inline
-unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
-{
-	return THERMAL_CSTATE_INVALID;
-}
 #endif	/* CONFIG_CPU_THERMAL */
 
 #endif /* __CPU_COOLING_H__ */

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 905117b..f10a9b3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h

@@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		return -EINVAL;
 	}
 }
+
+static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos;
+	int count = 0;
+
+	if (unlikely(!policy->freq_table))
+		return 0;
+
+	cpufreq_for_each_valid_entry(pos, policy->freq_table)
+		count++;
+
+	return count;
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 4090a42..2df2118 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h

@@ -19,7 +19,7 @@
 				     CRASH_CORE_NOTE_NAME_BYTES +	\
 				     CRASH_CORE_NOTE_DESC_BYTES)
 
-#define VMCOREINFO_BYTES	   (4096)
+#define VMCOREINFO_BYTES	   PAGE_SIZE
 #define VMCOREINFO_NOTE_NAME	   "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
 #define VMCOREINFO_NOTE_SIZE	   ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +	\
@@ -28,6 +28,7 @@
 
 typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
 
+void crash_update_vmcoreinfo_safecopy(void *ptr);
 void crash_save_vmcoreinfo(void);
 void arch_crash_save_vmcoreinfo(void);
 __printf(1, 2)
@@ -56,9 +57,7 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
-extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-extern size_t vmcoreinfo_size;
-extern size_t vmcoreinfo_max_size;
+extern u32 *vmcoreinfo_note;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
 			  void *data, size_t data_len);

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index fbecbd0..92e165d 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h

@@ -2,6 +2,7 @@
 #define CEPH_CRUSH_CRUSH_H
 
 #ifdef __KERNEL__
+# include <linux/rbtree.h>
 # include <linux/types.h>
 #else
 # include "crush_compat.h"
@@ -137,6 +138,68 @@ struct crush_bucket {
 
 };
 
+/** @ingroup API
+ *
+ * Replacement weights for each item in a bucket. The size of the
+ * array must be exactly the size of the straw2 bucket, just as the
+ * item_weights array.
+ *
+ */
+struct crush_weight_set {
+	__u32 *weights; /*!< 16.16 fixed point weights
+                             in the same order as items */
+	__u32 size;     /*!< size of the __weights__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for a given straw2 bucket, for
+ * placement purposes.
+ *
+ * When crush_do_rule() chooses the Nth item from a straw2 bucket, the
+ * replacement weights found at __weight_set[N]__ are used instead of
+ * the weights from __item_weights__. If __N__ is greater than
+ * __weight_set_size__, the weights found at __weight_set_size-1__ are
+ * used instead. For instance if __weight_set__ is:
+ *
+ *    [ [ 0x10000, 0x20000 ],   // position 0
+ *      [ 0x20000, 0x40000 ] ]  // position 1
+ *
+ * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ]
+ * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ]
+ * etc.
+ *
+ */
+struct crush_choose_arg {
+	__s32 *ids;            /*!< values to use instead of items */
+	__u32 ids_size;        /*!< size of the __ids__ array */
+	struct crush_weight_set *weight_set; /*!< weight replacements for
+                                                  a given position */
+	__u32 weight_set_size; /*!< size of the __weight_set__ array */
+};
+
+/** @ingroup API
+ *
+ * Replacement weights and ids for each bucket in the crushmap. The
+ * __size__ of the __args__ array must be exactly the same as the
+ * __map->max_buckets__.
+ *
+ * The __crush_choose_arg__ at index N will be used when choosing
+ * an item from the bucket __map->buckets[N]__ bucket, provided it
+ * is a straw2 bucket.
+ *
+ */
+struct crush_choose_arg_map {
+#ifdef __KERNEL__
+	struct rb_node node;
+	u64 choose_args_index;
+#endif
+	struct crush_choose_arg *args; /*!< replacement for each bucket
+                                            in the crushmap */
+	__u32 size;                    /*!< size of the __args__ array */
+};
+
 struct crush_bucket_uniform {
 	struct crush_bucket h;
 	__u32 item_weight;  /* 16-bit fixed point; all items equally weighted */
@@ -236,6 +299,9 @@ struct crush_map {
 	__u32 allowed_bucket_algs;
 
 	__u32 *choose_tries;
+#else
+	/* CrushWrapper::choose_args */
+	struct rb_root choose_args;
 #endif
 };
 

diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c95e19e..141edab 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h

@@ -11,11 +11,10 @@
 #include "crush.h"
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
-extern int crush_do_rule(const struct crush_map *map,
-			 int ruleno,
-			 int x, int *result, int result_max,
-			 const __u32 *weights, int weight_max,
-			 void *cwin);
+int crush_do_rule(const struct crush_map *map,
+		  int ruleno, int x, int *result, int result_max,
+		  const __u32 *weight, int weight_max,
+		  void *cwin, const struct crush_choose_arg *choose_args);
 
 /*
  * Returns the exact amount of workspace that will need to be used

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 8f39db7..7948118 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h

@@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry)
 #endif
 int dax_pfn_mkwrite(struct vm_fault *vmf);
 
-static inline bool vma_is_dax(struct vm_area_struct *vma)
-{
-	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
-}
-
 static inline bool dax_mapping(struct address_space *mapping)
 {
 	return mapping->host && IS_DAX(mapping->host);

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 025727b..c706eaa 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h

@@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 }
 
 struct name_snapshot {
-	const char *name;
-	char inline_name[DNAME_INLINE_LEN];
+	const unsigned char *name;
+	unsigned char inline_name[DNAME_INLINE_LEN];
 };
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
 void release_dentry_name_snapshot(struct name_snapshot *);

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 6daf6d4..2f14ac7 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h

@@ -14,6 +14,7 @@
 #define _LINUX_EVENTPOLL_H
 
 #include <uapi/linux/eventpoll.h>
+#include <uapi/linux/kcmp.h>
 
 
 /* Forward declarations to avoid compiler errors */
@@ -22,6 +23,10 @@ struct file;
 
 #ifdef CONFIG_EPOLL
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
+#endif
+
 /* Used to initialize the epoll bits inside the "struct file" */
 static inline void eventpoll_init_file(struct file *file)
 {

diff --git a/include/linux/extable.h b/include/linux/extable.h
index 7effea4..28addad 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h

@@ -2,13 +2,14 @@
 #define _LINUX_EXTABLE_H
 
 #include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 
 struct module;
 struct exception_table_entry;
 
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value);
 void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish);

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cfa471..976aaa1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -18,6 +18,7 @@
 #include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/mm_types.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fcntl.h>
@@ -1954,6 +1955,9 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  *			wb stat updates to grab mapping->tree_lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
+ * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
+ *			and work dirs among overlayfs mounts.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -1974,6 +1978,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
+#define I_OVL_INUSE			(1 << 14)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
@@ -3127,6 +3132,11 @@ static inline bool io_is_direct(struct file *filp)
 	return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
 }
 
+static inline bool vma_is_dax(struct vm_area_struct *vma)
+{
+	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
+}
+
 static inline int iocb_flags(struct file *file)
 {
 	int res = 0;

diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 3dff239..9ab3754 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h

@@ -12,6 +12,8 @@
 #ifndef _LINUX_FWNODE_H_
 #define _LINUX_FWNODE_H_
 
+#include <linux/types.h>
+
 enum fwnode_type {
 	FWNODE_INVALID = 0,
 	FWNODE_OF,
@@ -22,9 +24,12 @@ enum fwnode_type {
 	FWNODE_IRQCHIP
 };
 
+struct fwnode_operations;
+
 struct fwnode_handle {
 	enum fwnode_type type;
 	struct fwnode_handle *secondary;
+	const struct fwnode_operations *ops;
 };
 
 /**
@@ -39,4 +44,68 @@ struct fwnode_endpoint {
 	const struct fwnode_handle *local_fwnode;
 };
 
+/**
+ * struct fwnode_operations - Operations for fwnode interface
+ * @get: Get a reference to an fwnode.
+ * @put: Put a reference to an fwnode.
+ * @property_present: Return true if a property is present.
+ * @property_read_integer_array: Read an array of integer properties. Return
+ *				 zero on success, a negative error code
+ *				 otherwise.
+ * @property_read_string_array: Read an array of string properties. Return zero
+ *				on success, a negative error code otherwise.
+ * @get_parent: Return the parent of an fwnode.
+ * @get_next_child_node: Return the next child node in an iteration.
+ * @get_named_child_node: Return a child node with a given name.
+ * @graph_get_next_endpoint: Return an endpoint node in an iteration.
+ * @graph_get_remote_endpoint: Return the remote endpoint node of a local
+ *			       endpoint node.
+ * @graph_get_port_parent: Return the parent node of a port node.
+ * @graph_parse_endpoint: Parse endpoint for port and endpoint id.
+ */
+struct fwnode_operations {
+	void (*get)(struct fwnode_handle *fwnode);
+	void (*put)(struct fwnode_handle *fwnode);
+	bool (*device_is_available)(struct fwnode_handle *fwnode);
+	bool (*property_present)(struct fwnode_handle *fwnode,
+				 const char *propname);
+	int (*property_read_int_array)(struct fwnode_handle *fwnode,
+				       const char *propname,
+				       unsigned int elem_size, void *val,
+				       size_t nval);
+	int (*property_read_string_array)(struct fwnode_handle *fwnode_handle,
+					  const char *propname,
+					  const char **val, size_t nval);
+	struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*get_next_child_node)(struct fwnode_handle *fwnode,
+			       struct fwnode_handle *child);
+	struct fwnode_handle *
+	(*get_named_child_node)(struct fwnode_handle *fwnode, const char *name);
+	struct fwnode_handle *
+	(*graph_get_next_endpoint)(struct fwnode_handle *fwnode,
+				   struct fwnode_handle *prev);
+	struct fwnode_handle *
+	(*graph_get_remote_endpoint)(struct fwnode_handle *fwnode);
+	struct fwnode_handle *
+	(*graph_get_port_parent)(struct fwnode_handle *fwnode);
+	int (*graph_parse_endpoint)(struct fwnode_handle *fwnode,
+				    struct fwnode_endpoint *endpoint);
+};
+
+#define fwnode_has_op(fwnode, op)				\
+	((fwnode) && (fwnode)->ops && (fwnode)->ops->op)
+#define fwnode_call_int_op(fwnode, op, ...)				\
+	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
+		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
+	 -EINVAL)
+#define fwnode_call_ptr_op(fwnode, op, ...)		\
+	(fwnode_has_op(fwnode, op) ?			\
+	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
+#define fwnode_call_void_op(fwnode, op, ...)				\
+	do {								\
+		if (fwnode_has_op(fwnode, op))				\
+			(fwnode)->ops->op(fwnode, ## __VA_ARGS__);	\
+	} while (false)
+
 #endif

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 4c6656f..bcfb9f7 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h

@@ -25,7 +25,7 @@ struct vm_area_struct;
 #define ___GFP_FS		0x80u
 #define ___GFP_COLD		0x100u
 #define ___GFP_NOWARN		0x200u
-#define ___GFP_REPEAT		0x400u
+#define ___GFP_RETRY_MAYFAIL	0x400u
 #define ___GFP_NOFAIL		0x800u
 #define ___GFP_NORETRY		0x1000u
 #define ___GFP_MEMALLOC		0x2000u
@@ -136,26 +136,56 @@ struct vm_area_struct;
  *
  * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
- * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
- *   _might_ fail.  This depends upon the particular VM implementation.
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules
+ *
+ * __GFP_NORETRY: The VM implementation will try only very lightweight
+ *   memory direct reclaim to get some memory under memory pressure (thus
+ *   it can sleep). It will avoid disruptive actions like OOM killer. The
+ *   caller must handle the failure which is quite likely to happen under
+ *   heavy memory pressure. The flag is suitable when failure can easily be
+ *   handled at small cost, such as reduced throughput
+ *
+ * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ *   procedures that have previously failed if there is some indication
+ *   that progress has been made else where.  It can wait for other
+ *   tasks to attempt high level approaches to freeing memory such as
+ *   compaction (which removes fragmentation) and page-out.
+ *   There is still a definite limit to the number of retries, but it is
+ *   a larger limit than with __GFP_NORETRY.
+ *   Allocations with this flag may fail, but only when there is
+ *   genuinely little unused memory. While these allocations do not
+ *   directly trigger the OOM killer, their failure indicates that
+ *   the system is likely to need to use the OOM killer soon.  The
+ *   caller must handle failure, but can reasonably do so by failing
+ *   a higher-level request, or completing it only in a much less
+ *   efficient manner.
+ *   If the allocation does fail, and the caller is in a position to
+ *   free some non-essential memory, doing so could benefit the system
+ *   as a whole.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. New users should be evaluated carefully
- *   (and the flag should be used only when there is no reasonable failure
- *   policy) but it is definitely preferable to use the flag rather than
- *   opencode endless loop around allocator.
- *
- * __GFP_NORETRY: The VM implementation must not retry indefinitely and will
- *   return NULL when direct reclaim and memory compaction have failed to allow
- *   the allocation to succeed.  The OOM killer is not called with the current
- *   implementation.
+ *   cannot handle allocation failures. The allocation could block
+ *   indefinitely but will never return with failure. Testing for
+ *   failure is pointless.
+ *   New users should be evaluated carefully (and the flag should be
+ *   used only when there is no reasonable failure policy) but it is
+ *   definitely preferable to use the flag rather than opencode endless
+ *   loop around allocator.
+ *   Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
 #define __GFP_DIRECT_RECLAIM	((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
 #define __GFP_KSWAPD_RECLAIM	((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
 #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
-#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)
+#define __GFP_RETRY_MAYFAIL	((__force gfp_t)___GFP_RETRY_MAYFAIL)
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5be325d..5006f9b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h

@@ -34,6 +34,7 @@
 #include <linux/workqueue.h>
 #include <linux/input.h>
 #include <linux/semaphore.h>
+#include <linux/mutex.h>
 #include <linux/power_supply.h>
 #include <uapi/linux/hid.h>
 
@@ -182,6 +183,11 @@ struct hid_item {
 #define HID_GD_KEYBOARD		0x00010006
 #define HID_GD_KEYPAD		0x00010007
 #define HID_GD_MULTIAXIS	0x00010008
+/*
+ * Microsoft Win8 Wireless Radio Controls extensions CA, see:
+ * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf
+ */
+#define HID_GD_WIRELESS_RADIO_CTLS	0x0001000c
 #define HID_GD_X		0x00010030
 #define HID_GD_Y		0x00010031
 #define HID_GD_Z		0x00010032
@@ -210,6 +216,10 @@ struct hid_item {
 #define HID_GD_DOWN		0x00010091
 #define HID_GD_RIGHT		0x00010092
 #define HID_GD_LEFT		0x00010093
+/* Microsoft Win8 Wireless Radio Controls CA usage codes */
+#define HID_GD_RFKILL_BTN	0x000100c6
+#define HID_GD_RFKILL_LED	0x000100c7
+#define HID_GD_RFKILL_SWITCH	0x000100c8
 
 #define HID_DC_BATTERYSTRENGTH	0x00060020
 
@@ -520,7 +530,10 @@ struct hid_device {							/* device report descriptor */
 	struct semaphore driver_input_lock;				/* protects the current driver */
 	struct device dev;						/* device */
 	struct hid_driver *driver;
+
 	struct hid_ll_driver *ll_driver;
+	struct mutex ll_open_lock;
+	unsigned int ll_open_count;
 
 #ifdef CONFIG_HID_BATTERY_STRENGTH
 	/*
@@ -544,7 +557,6 @@ struct hid_device {							/* device report descriptor */
 	void *hiddev;							/* The hiddev structure */
 	void *hidraw;
 
-	int open;							/* is the device open by anyone? */
 	char name[128];							/* Device name */
 	char phys[64];							/* Device physical location */
 	char uniq[64];							/* Device unique identifier (serial #) */
@@ -937,69 +949,11 @@ static inline int __must_check hid_parse(struct hid_device *hdev)
 	return hid_open_report(hdev);
 }
 
-/**
- * hid_hw_start - start underlaying HW
- *
- * @hdev: hid device
- * @connect_mask: which outputs to connect, see HID_CONNECT_*
- *
- * Call this in probe function *after* hid_parse. This will setup HW buffers
- * and start the device (if not deffered to device open). hid_hw_stop must be
- * called if this was successful.
- */
-static inline int __must_check hid_hw_start(struct hid_device *hdev,
-		unsigned int connect_mask)
-{
-	int ret = hdev->ll_driver->start(hdev);
-	if (ret || !connect_mask)
-		return ret;
-	ret = hid_connect(hdev, connect_mask);
-	if (ret)
-		hdev->ll_driver->stop(hdev);
-	return ret;
-}
-
-/**
- * hid_hw_stop - stop underlaying HW
- *
- * @hdev: hid device
- *
- * This is usually called from remove function or from probe when something
- * failed and hid_hw_start was called already.
- */
-static inline void hid_hw_stop(struct hid_device *hdev)
-{
-	hid_disconnect(hdev);
-	hdev->ll_driver->stop(hdev);
-}
-
-/**
- * hid_hw_open - signal underlaying HW to start delivering events
- *
- * @hdev: hid device
- *
- * Tell underlying HW to start delivering events from the device.
- * This function should be called sometime after successful call
- * to hid_hiw_start().
- */
-static inline int __must_check hid_hw_open(struct hid_device *hdev)
-{
-	return hdev->ll_driver->open(hdev);
-}
-
-/**
- * hid_hw_close - signal underlaying HW to stop delivering events
- *
- * @hdev: hid device
- *
- * This function indicates that we are not interested in the events
- * from this device anymore. Delivery of events may or may not stop,
- * depending on the number of users still outstanding.
- */
-static inline void hid_hw_close(struct hid_device *hdev)
-{
-	hdev->ll_driver->close(hdev);
-}
+int __must_check hid_hw_start(struct hid_device *hdev,
+			      unsigned int connect_mask);
+void hid_hw_stop(struct hid_device *hdev);
+int __must_check hid_hw_open(struct hid_device *hdev);
+void hid_hw_close(struct hid_device *hdev);
 
 /**
  * hid_hw_power - requests underlying HW to go into given power mode

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index d3b3e8f..ee69634 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h

@@ -1,6 +1,10 @@
 #ifndef _LINUX_HUGE_MM_H
 #define _LINUX_HUGE_MM_H
 
+#include <linux/sched/coredump.h>
+
+#include <linux/fs.h> /* only for vma_is_dax() */
+
 extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
@@ -85,14 +89,32 @@ extern struct kobj_attribute shmem_enabled_attr;
 
 extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 
-#define transparent_hugepage_enabled(__vma)				\
-	((transparent_hugepage_flags &					\
-	  (1<<TRANSPARENT_HUGEPAGE_FLAG) ||				\
-	  (transparent_hugepage_flags &					\
-	   (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) &&			\
-	   ((__vma)->vm_flags & VM_HUGEPAGE))) &&			\
-	 !((__vma)->vm_flags & VM_NOHUGEPAGE) &&			\
-	 !is_vma_temporary_stack(__vma))
+extern unsigned long transparent_hugepage_flags;
+
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_NOHUGEPAGE)
+		return false;
+
+	if (is_vma_temporary_stack(vma))
+		return false;
+
+	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+		return false;
+
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
+		return true;
+
+	if (vma_is_dax(vma))
+		return true;
+
+	if (transparent_hugepage_flags &
+				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+		return !!(vma->vm_flags & VM_HUGEPAGE);
+
+	return false;
+}
+
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
 	 (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -104,8 +126,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
 #define transparent_hugepage_debug_cow() 0
 #endif /* CONFIG_DEBUG_VM */
 
-extern unsigned long transparent_hugepage_flags;
-
 extern unsigned long thp_get_unmapped_area(struct file *filp,
 		unsigned long addr, unsigned long len, unsigned long pgoff,
 		unsigned long flags);
@@ -224,7 +244,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm);
 
 #define hpage_nr_pages(x) 1
 
-#define transparent_hugepage_enabled(__vma) 0
+static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+{
+	return false;
+}
 
 static inline void prep_transhuge_page(struct page *page) {}
 

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 46bfb70..8d9fe13 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h

@@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						vm_flags_t vm_flags);
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
-int dequeue_hwpoisoned_huge_page(struct page *page);
 bool isolate_huge_page(struct page *page, struct list_head *list);
 void putback_active_hugepage(struct page *page);
 void free_huge_page(struct page *page);
@@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void)
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 				src_addr, pagep)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address, sz)	0
-static inline int dequeue_hwpoisoned_huge_page(struct page *page)
-{
-	return 0;
-}
 
 static inline bool isolate_huge_page(struct page *page, struct list_head *list)
 {
@@ -354,6 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
 struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+				nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
@@ -472,6 +469,7 @@ static inline pgoff_t basepage_index(struct page *page)
 	return __basepage_index(page);
 }
 
+extern int dissolve_free_huge_page(struct page *page);
 extern int dissolve_free_huge_pages(unsigned long start_pfn,
 				    unsigned long end_pfn);
 static inline bool hugepage_migration_supported(struct hstate *h)
@@ -528,6 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
 #define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
 #define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
@@ -550,15 +549,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1;
 }
-#define hstate_index_to_shift(index) 0
-#define hstate_index(h) 0
+
+static inline unsigned hstate_index_to_shift(unsigned index)
+{
+	return 0;
+}
+
+static inline int hstate_index(struct hstate *h)
+{
+	return 0;
+}
 
 static inline pgoff_t basepage_index(struct page *page)
 {
 	return page->index;
 }
-#define dissolve_free_huge_pages(s, e)	0
-#define hugepage_migration_supported(h)	false
+
+static inline int dissolve_free_huge_page(struct page *page)
+{
+	return 0;
+}
+
+static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+					   unsigned long end_pfn)
+{
+	return 0;
+}
+
+static inline bool hugepage_migration_supported(struct hstate *h)
+{
+	return false;
+}
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 72d0ece..00ca5b8 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h

@@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client,
 {
 	return client->slave_cb(client, event, val);
 }
+#else
+static inline bool i2c_detect_slave_mode(struct device *dev) { return false; }
 #endif
 
 /**

diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
deleted file mode 100644
index 06e3089..0000000
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ /dev/null

@@ -1,11 +0,0 @@
-#ifndef __I2C_SH_MOBILE_H__
-#define __I2C_SH_MOBILE_H__
-
-#include <linux/platform_device.h>
-
-struct i2c_sh_mobile_platform_data {
-	unsigned long bus_speed;
-	unsigned int clks_per_count;
-};
-
-#endif /* __I2C_SH_MOBILE_H__ */

diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 55289d2..bc67b76 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h

@@ -10,6 +10,9 @@ extern int rd_prompt;
 /* starting block # of image */
 extern int rd_image_start;
 
+/* size of a single RAM disk */
+extern unsigned long rd_size;
+
 /* 1 if it is not an error if initrd_start < memory_start */
 extern int initrd_below_start_ok;
 

diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 3c25794..99bc5b3 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h

@@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
  */
 extern int intel_svm_unbind_mm(struct device *dev, int pasid);
 
+/**
+ * intel_svm_is_pasid_valid() - check if pasid is valid
+ * @dev:	Device for which PASID was allocated
+ * @pasid:	PASID value to be checked
+ *
+ * This function checks if the specified pasid is still valid. A
+ * valid pasid means the backing mm is still having a valid user.
+ * For kernel callers init_mm is always valid. for other mm, if mm->mm_users
+ * is non-zero, it is valid.
+ *
+ * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid
+ * 1 if pasid is valid.
+ */
+extern int intel_svm_is_pasid_valid(struct device *dev, int pasid);
+
 #else /* CONFIG_INTEL_IOMMU_SVM */
 
 static inline int intel_svm_bind_mm(struct device *dev, int *pasid,
@@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
 {
 	BUG();
 }
+
+static int intel_svm_is_pasid_valid(struct device *dev, int pasid)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_INTEL_IOMMU_SVM */
 
 #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL))

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 69f4e94..f64dc6c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h

@@ -84,6 +84,10 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
+loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
+loff_t iomap_seek_data(struct inode *inode, loff_t offset,
+		const struct iomap_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io:

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 71fd92d..5591f055 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h

@@ -20,6 +20,9 @@ struct kern_ipc_perm {
 	umode_t		mode;
 	unsigned long	seq;
 	void		*security;
+
+	struct rcu_head rcu;
+	atomic_t refcount;
 } ____cacheline_aligned_in_smp;
 
 #endif /* _LINUX_IPC_H */

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1c91f26..bd6d96c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h

@@ -11,6 +11,7 @@
 #include <linux/log2.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
+#include <linux/build_bug.h>
 #include <asm/byteorder.h>
 #include <uapi/linux/kernel.h>
 
@@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @member:	the name of the member within the struct.
  *
  */
-#define container_of(ptr, type, member) ({			\
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
-	(type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({				\
+	void *__mptr = (void *)(ptr);					\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type *)(__mptr - offsetof(type, member))); })
 
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 6588841..dd056fa 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h

@@ -172,6 +172,7 @@ struct kimage {
 	unsigned long start;
 	struct page *control_code_page;
 	struct page *swap_page;
+	void *vmcoreinfo_data_copy; /* locates in the crash memory */
 
 	unsigned long nr_segments;
 	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
@@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 int kexec_crash_loaded(void);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
 
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 5d9a400..f0d7335 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h

@@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
 		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
-		    !(vm_flags & VM_NOHUGEPAGE))
+		    !(vm_flags & VM_NOHUGEPAGE) &&
+		    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index cb0ba9f..fa7fd03 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h

@@ -44,6 +44,7 @@ struct list_lru_node {
 	/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
 	struct list_lru_memcg	*memcg_lrus;
 #endif
+	long nr_items;
 } ____cacheline_aligned_in_smp;
 
 struct list_lru {

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 41f7b6a..3eca677 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h

@@ -192,9 +192,9 @@ struct nlm_block {
  * Global variables
  */
 extern const struct rpc_program	nlm_program;
-extern struct svc_procedure	nlmsvc_procedures[];
+extern const struct svc_procedure nlmsvc_procedures[];
 #ifdef CONFIG_LOCKD_V4
-extern struct svc_procedure	nlmsvc_procedures4[];
+extern const struct svc_procedure nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;

diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d39ed1c..7acbecc 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h

@@ -95,19 +95,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);

diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index e58c88b..bf16456 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h

@@ -23,19 +23,19 @@
 
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 28baee6..4e887ba 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h

@@ -149,6 +149,7 @@ struct cros_ec_device {
 
 	struct ec_response_get_next_event event_data;
 	int event_size;
+	u32 host_event_wake_mask;
 };
 
 /**
@@ -172,6 +173,8 @@ struct cros_ec_platform {
 	u16 cmd_offset;
 };
 
+struct cros_ec_debugfs;
+
 /*
  * struct cros_ec_dev - ChromeOS EC device entry point
  *
@@ -179,6 +182,7 @@ struct cros_ec_platform {
  * @cdev: Character device structure in /dev
  * @ec_dev: cros_ec_device structure to talk to the physical device
  * @dev: pointer to the platform device
+ * @debug_info: cros_ec_debugfs structure for debugging information
  * @cmd_offset: offset to apply for each command.
  */
 struct cros_ec_dev {
@@ -186,6 +190,7 @@ struct cros_ec_dev {
 	struct cdev cdev;
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
+	struct cros_ec_debugfs *debug_info;
 	u16 cmd_offset;
 	u32 features[2];
 };
@@ -295,10 +300,22 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev);
  * cros_ec_get_next_event -  Fetch next event from the ChromeOS EC
  *
  * @ec_dev: Device to fetch event from
+ * @wake_event: Pointer to a bool set to true upon return if the event might be
+ *              treated as a wake event. Ignored if null.
  *
  * Returns: 0 on success, Linux error number on failure
  */
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev);
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
+
+/**
+ * cros_ec_get_host_event - Return a mask of event set by the EC.
+ *
+ * When MKBP is supported, when the EC raises an interrupt,
+ * We collect the events raised and call the functions in the ec notifier.
+ *
+ * This function is a helper to know which events are raised.
+ */
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c93e7e0..190c8f4 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h

@@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions {
 	uint8_t cmd;      /* Command to check */
 } __packed;
 
+struct ec_params_get_cmd_versions_v1 {
+	uint16_t cmd;     /* Command to check */
+} __packed;
+
 struct ec_response_get_cmd_versions {
 	/*
 	 * Mask of supported versions; use EC_VER_MASK() to compare with a
@@ -1158,13 +1162,20 @@ struct lightbar_params_v1 {
 	struct rgb_s color[8];			/* 0-3 are Google colors */
 } __packed;
 
+/* Lightbar program */
+#define EC_LB_PROG_LEN 192
+struct lightbar_program {
+	uint8_t size;
+	uint8_t data[EC_LB_PROG_LEN];
+};
+
 struct ec_params_lightbar {
 	uint8_t cmd;		      /* Command (see enum lightbar_command) */
 	union {
 		struct {
 			/* no args */
 		} dump, off, on, init, get_seq, get_params_v0, get_params_v1,
-			version, get_brightness, get_demo;
+			version, get_brightness, get_demo, suspend, resume;
 
 		struct {
 			uint8_t num;
@@ -1182,8 +1193,13 @@ struct ec_params_lightbar {
 			uint8_t led;
 		} get_rgb;
 
+		struct {
+			uint8_t enable;
+		} manual_suspend_ctrl;
+
 		struct lightbar_params_v0 set_params_v0;
 		struct lightbar_params_v1 set_params_v1;
+		struct lightbar_program set_program;
 	};
 } __packed;
 
@@ -1216,7 +1232,8 @@ struct ec_response_lightbar {
 		struct {
 			/* no return params */
 		} off, on, init, set_brightness, seq, reg, set_rgb,
-			demo, set_params_v0, set_params_v1;
+			demo, set_params_v0, set_params_v1,
+			set_program, manual_suspend_ctrl, suspend, resume;
 	};
 } __packed;
 
@@ -1240,6 +1257,10 @@ enum lightbar_command {
 	LIGHTBAR_CMD_GET_DEMO = 15,
 	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
 	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
+	LIGHTBAR_CMD_SET_PROGRAM = 18,
+	LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
+	LIGHTBAR_CMD_SUSPEND = 20,
+	LIGHTBAR_CMD_RESUME = 21,
 	LIGHTBAR_NUM_CMDS
 };
 
@@ -2285,13 +2306,28 @@ struct ec_params_charge_control {
 #define EC_CMD_CONSOLE_SNAPSHOT 0x97
 
 /*
- * Read next chunk of data from saved snapshot.
+ * Read data from the saved snapshot. If the subcmd parameter is
+ * CONSOLE_READ_NEXT, this will return data starting from the beginning of
+ * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
+ * end of the previous snapshot.
+ *
+ * The params are only looked at in version >= 1 of this command. Prior
+ * versions will just default to CONSOLE_READ_NEXT behavior.
  *
  * Response is null-terminated string.  Empty string, if there is no more
  * remaining output.
  */
 #define EC_CMD_CONSOLE_READ 0x98
 
+enum ec_console_read_subcmd {
+	CONSOLE_READ_NEXT = 0,
+	CONSOLE_READ_RECENT
+};
+
+struct ec_params_console_read_v1 {
+	uint8_t subcmd; /* enum ec_console_read_subcmd */
+} __packed;
+
 /*****************************************************************************/
 
 /*

diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
new file mode 100644
index 0000000..176496d
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_mec.h

@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_MEC_H
+#define __LINUX_MFD_CROS_EC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+	/* 8-bit access */
+	ACCESS_TYPE_BYTE = 0x0,
+	/* 16-bit access */
+	ACCESS_TYPE_WORD = 0x1,
+	/* 32-bit access */
+	ACCESS_TYPE_LONG = 0x2,
+	/*
+	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+	 * EC data register to be incremented.
+	 */
+	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+	MEC_IO_READ,
+	MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __LINUX_MFD_CROS_EC_MEC_H */

diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
new file mode 100644
index 0000000..5560bef
--- /dev/null
+++ b/include/linux/mfd/cros_ec_lpc_reg.h

@@ -0,0 +1,61 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __LINUX_MFD_CROS_EC_REG_H
+#define __LINUX_MFD_CROS_EC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
+#endif /* __LINUX_MFD_CROS_EC_REG_H */

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 48e2484..3e0d405 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h

@@ -4,6 +4,7 @@
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
 #include <linux/migrate_mode.h>
+#include <linux/hugetlb.h>
 
 typedef struct page *new_page_t(struct page *page, unsigned long private,
 				int **reason);
@@ -30,6 +31,21 @@ enum migrate_reason {
 /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
 extern char *migrate_reason_names[MR_TYPES];
 
+static inline struct page *new_page_nodemask(struct page *page,
+				int preferred_nid, nodemask_t *nodemask)
+{
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
+
+	if (PageHuge(page))
+		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
+				preferred_nid, nodemask);
+
+	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
+		gfp_mask |= __GFP_HIGHMEM;
+
+	return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask);
+}
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_movable_pages(struct list_head *l);

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7e8f100..fc14b8b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h

@@ -603,12 +603,9 @@ extern struct page *mem_map;
 #endif
 
 /*
- * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
- * (mostly NUMA machines?) to denote a higher-level memory zone than the
- * zone denotes.
- *
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
- * it's memory layout.
+ * it's memory layout. On UMA machines there is a single pglist_data which
+ * describes the whole memory.
  *
  * Memory statistics and page replacement data structures are maintained on a
  * per-zone basis.
@@ -1058,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 	!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
 static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 {
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
 	return 0;
 }
 #endif

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index de0d889..892148c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h

@@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 #define NAND_STATUS_READY	0x40
 #define NAND_STATUS_WP		0x80
 
+#define NAND_DATA_IFACE_CHECK_ONLY	-1
+
 /*
  * Constants for ECC_MODES
  */
@@ -116,6 +118,7 @@ typedef enum {
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
+	NAND_ECC_ON_DIE,
 } nand_ecc_modes_t;
 
 enum nand_ecc_algo {
@@ -257,6 +260,8 @@ struct nand_chip;
 
 /* Vendor-specific feature address (Micron) */
 #define ONFI_FEATURE_ADDR_READ_RETRY	0x89
+#define ONFI_FEATURE_ON_DIE_ECC		0x90
+#define   ONFI_FEATURE_ON_DIE_ECC_EN	BIT(3)
 
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
@@ -477,6 +482,44 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc)
 }
 
 /**
+ * struct nand_ecc_step_info - ECC step information of ECC engine
+ * @stepsize: data bytes per ECC step
+ * @strengths: array of supported strengths
+ * @nstrengths: number of supported strengths
+ */
+struct nand_ecc_step_info {
+	int stepsize;
+	const int *strengths;
+	int nstrengths;
+};
+
+/**
+ * struct nand_ecc_caps - capability of ECC engine
+ * @stepinfos: array of ECC step information
+ * @nstepinfos: number of ECC step information
+ * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step
+ */
+struct nand_ecc_caps {
+	const struct nand_ecc_step_info *stepinfos;
+	int nstepinfos;
+	int (*calc_ecc_bytes)(int step_size, int strength);
+};
+
+/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */
+#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...)	\
+static const int __name##_strengths[] = { __VA_ARGS__ };	\
+static const struct nand_ecc_step_info __name##_stepinfo = {	\
+	.stepsize = __step,					\
+	.strengths = __name##_strengths,			\
+	.nstrengths = ARRAY_SIZE(__name##_strengths),		\
+};								\
+static const struct nand_ecc_caps __name = {			\
+	.stepinfos = &__name##_stepinfo,			\
+	.nstepinfos = 1,					\
+	.calc_ecc_bytes = __calc,				\
+}
+
+/**
  * struct nand_ecc_ctrl - Control structure for ECC
  * @mode:	ECC mode
  * @algo:	ECC algorithm
@@ -815,7 +858,10 @@ struct nand_manufacturer_ops {
  * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
- * @setup_data_interface: [OPTIONAL] setup the data interface and timing
+ * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If
+ *			  chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this
+ *			  means the configuration should not be applied but
+ *			  only checked.
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
  *			lookup.
@@ -826,9 +872,6 @@ struct nand_manufacturer_ops {
  *			structure which is shared among multiple independent
  *			devices.
  * @priv:		[OPTIONAL] pointer to private chip data
- * @errstat:		[OPTIONAL] hardware specific function to perform
- *			additional error status checks (determine if errors are
- *			correctable).
  * @manufacturer:	[INTERN] Contains manufacturer information
  */
 
@@ -852,16 +895,13 @@ struct nand_chip {
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
 	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
-	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
-			int status, int page);
 	int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
-	int (*setup_data_interface)(struct mtd_info *mtd,
-				    const struct nand_data_interface *conf,
-				    bool check_only);
+	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
+				    const struct nand_data_interface *conf);
 
 
 	int chip_delay;
@@ -1244,6 +1284,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *extraoob, int extraooblen,
 				int threshold);
 
+int nand_check_ecc_caps(struct nand_chip *chip,
+			const struct nand_ecc_caps *caps, int oobavail);
+
+int nand_match_ecc_req(struct nand_chip *chip,
+		       const struct nand_ecc_caps *caps,  int oobavail);
+
+int nand_maximize_ecc(struct nand_chip *chip,
+		      const struct nand_ecc_caps *caps, int oobavail);
+
 /* Default write_oob implementation */
 int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 
@@ -1258,6 +1307,19 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			   int page);
 
+/* Stub used by drivers that do not support GET/SET FEATURES operations */
+int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd,
+				       struct nand_chip *chip, int addr,
+				       u8 *subfeature_param);
+
+/* Default read_page_raw implementation */
+int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		       uint8_t *buf, int oob_required, int page);
+
+/* Default write_page_raw implementation */
+int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+			const uint8_t *buf, int oob_required, int page);
+
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
 

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 06df1e0..c4beb70 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h

@@ -20,6 +20,12 @@
  *
  * For each partition, these fields are available:
  * name: string that will be used to label the partition's MTD device.
+ * types: some partitions can be containers using specific format to describe
+ *	embedded subpartitions / volumes. E.g. many home routers use "firmware"
+ *	partition that contains at least kernel and rootfs. In such case an
+ *	extra parser is needed that will detect these dynamic partitions and
+ *	report them to the MTD subsystem. If set this property stores an array
+ *	of parser names to use when looking for subpartitions.
  * size: the partition size; if defined as MTDPART_SIZ_FULL, the partition
  * 	will extend to the end of the master MTD device.
  * offset: absolute starting position within the master MTD device; if
@@ -38,6 +44,7 @@
 
 struct mtd_partition {
 	const char *name;		/* identifier string */
+	const char *const *types;	/* names of parsers to use if any */
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
 	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f2a7180..55faa2f 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h

@@ -73,6 +73,15 @@
 #define SPINOR_OP_BE_32K_4B	0x5c	/* Erase 32KiB block */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
+/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */
+#define SPINOR_OP_READ_1_1_1_DTR	0x0d
+#define SPINOR_OP_READ_1_2_2_DTR	0xbd
+#define SPINOR_OP_READ_1_4_4_DTR	0xed
+
+#define SPINOR_OP_READ_1_1_1_DTR_4B	0x0e
+#define SPINOR_OP_READ_1_2_2_DTR_4B	0xbe
+#define SPINOR_OP_READ_1_4_4_DTR_4B	0xee
+
 /* Used for SST flashes only. */
 #define SPINOR_OP_BP		0x02	/* Byte program */
 #define SPINOR_OP_WRDI		0x04	/* Write disable */
@@ -119,13 +128,81 @@
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		BIT(1)	/* Spansion Quad I/O */
 
-enum read_mode {
-	SPI_NOR_NORMAL = 0,
-	SPI_NOR_FAST,
-	SPI_NOR_DUAL,
-	SPI_NOR_QUAD,
+/* Supported SPI protocols */
+#define SNOR_PROTO_INST_MASK	GENMASK(23, 16)
+#define SNOR_PROTO_INST_SHIFT	16
+#define SNOR_PROTO_INST(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \
+	 SNOR_PROTO_INST_MASK)
+
+#define SNOR_PROTO_ADDR_MASK	GENMASK(15, 8)
+#define SNOR_PROTO_ADDR_SHIFT	8
+#define SNOR_PROTO_ADDR(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \
+	 SNOR_PROTO_ADDR_MASK)
+
+#define SNOR_PROTO_DATA_MASK	GENMASK(7, 0)
+#define SNOR_PROTO_DATA_SHIFT	0
+#define SNOR_PROTO_DATA(_nbits)	\
+	((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \
+	 SNOR_PROTO_DATA_MASK)
+
+#define SNOR_PROTO_IS_DTR	BIT(24)	/* Double Transfer Rate */
+
+#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_INST(_inst_nbits) |				\
+	 SNOR_PROTO_ADDR(_addr_nbits) |				\
+	 SNOR_PROTO_DATA(_data_nbits))
+#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits)	\
+	(SNOR_PROTO_IS_DTR |					\
+	 SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits))
+
+enum spi_nor_protocol {
+	SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1),
+	SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2),
+	SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4),
+	SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8),
+	SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2),
+	SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4),
+	SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8),
+	SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2),
+	SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4),
+	SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8),
+
+	SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1),
+	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
+	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
+	SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8),
 };
 
+static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
+{
+	return !!(proto & SNOR_PROTO_IS_DTR);
+}
+
+static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >>
+		SNOR_PROTO_INST_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >>
+		SNOR_PROTO_ADDR_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto)
+{
+	return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >>
+		SNOR_PROTO_DATA_SHIFT;
+}
+
+static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
+{
+	return spi_nor_get_protocol_data_nbits(proto);
+}
+
 #define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
@@ -154,9 +231,11 @@ enum spi_nor_option_flags {
  * @read_opcode:	the read opcode
  * @read_dummy:		the dummy needed by the read operation
  * @program_opcode:	the program opcode
- * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
  * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
+ * @read_proto:		the SPI protocol for read operations
+ * @write_proto:	the SPI protocol for write operations
+ * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
@@ -185,7 +264,9 @@ struct spi_nor {
 	u8			read_opcode;
 	u8			read_dummy;
 	u8			program_opcode;
-	enum read_mode		flash_read;
+	enum spi_nor_protocol	read_proto;
+	enum spi_nor_protocol	write_proto;
+	enum spi_nor_protocol	reg_proto;
 	bool			sst_write_second;
 	u32			flags;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
@@ -220,10 +301,71 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 }
 
 /**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:		the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+	u32	mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
+#define SNOR_HWCAPS_READ		BIT(0)
+#define SNOR_HWCAPS_READ_FAST		BIT(1)
+#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
+
+#define SNOR_HWCPAS_READ_OCTO		GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
+#define SNOR_HWCAPS_PP		BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
+
+#define SNOR_HWCAPS_PP_OCTO	GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
+
+/**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
  * @name:	the chip type name
- * @mode:	the read mode supported by the driver
+ * @hwcaps:	the hardware capabilities supported by the controller driver
  *
  * The drivers can use this fuction to scan the SPI NOR.
  * In the scanning, it will try to get all the necessary information to
@@ -233,6 +375,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
  *
  * Return: 0 for success, others for failure.
  */
-int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name,
+		 const struct spi_nor_hwcaps *hwcaps);
 
 #endif

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b1ca04..47239c3 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h

@@ -479,6 +479,7 @@ enum {
 	NFSPROC4_CLNT_ACCESS,
 	NFSPROC4_CLNT_GETATTR,
 	NFSPROC4_CLNT_LOOKUP,
+	NFSPROC4_CLNT_LOOKUPP,
 	NFSPROC4_CLNT_LOOKUP_ROOT,
 	NFSPROC4_CLNT_REMOVE,
 	NFSPROC4_CLNT_RENAME,

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bb0eb2c..e52cc55 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h

@@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *);
 extern void nfs_invalidate_atime(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *, struct nfs4_label *);
+struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e418a10..74c4466 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h

@@ -42,6 +42,7 @@ struct nfs_client {
 #define NFS_CS_MIGRATION	2		/* - transparent state migr */
 #define NFS_CS_INFINITE_SLOTS	3		/* - don't limit TCP slots */
 #define NFS_CS_NO_RETRANS_TIMEOUT	4	/* - Disable retransmit timeouts */
+#define NFS_CS_TSM_POSSIBLE	5		/* - Maybe state migration */
 	struct sockaddr_storage	cl_addr;	/* server identifier */
 	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
@@ -210,6 +211,7 @@ struct nfs_server {
 	unsigned long		mig_status;
 #define NFS_MIG_IN_TRANSITION		(1)
 #define NFS_MIG_FAILED			(2)
+#define NFS_MIG_TSM_POSSIBLE		(3)
 
 	void (*destroy)(struct nfs_server *);
 

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d..d67b67a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h

@@ -33,6 +33,8 @@ enum {
 	PG_UPTODATE,		/* page group sync bit in read path */
 	PG_WB_END,		/* page group sync bit in write path */
 	PG_REMOVE,		/* page group sync bit in write path */
+	PG_CONTENDED1,		/* Is someone waiting for a lock? */
+	PG_CONTENDED2,		/* Is someone waiting for a lock? */
 };
 
 struct nfs_inode;
@@ -93,8 +95,8 @@ struct nfs_pageio_descriptor {
 	const struct rpc_call_ops *pg_rpc_callops;
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
+	struct nfs_io_completion *pg_io_completion;
 	struct nfs_direct_req	*pg_dreq;
-	void			*pg_layout_private;
 	unsigned int		pg_bsize;	/* default bsize for mirrors */
 
 	u32			pg_mirror_count;

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c834..ca3bcc4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -878,7 +878,7 @@ struct nfs3_readdirargs {
 	struct nfs_fh *		fh;
 	__u64			cookie;
 	__be32			verf[2];
-	int			plus;
+	bool			plus;
 	unsigned int            count;
 	struct page **		pages;
 };
@@ -909,7 +909,7 @@ struct nfs3_linkres {
 struct nfs3_readdirres {
 	struct nfs_fattr *	dir_attr;
 	__be32 *		verf;
-	int			plus;
+	bool			plus;
 };
 
 struct nfs3_getaclres {
@@ -1012,7 +1012,6 @@ struct nfs4_link_res {
 	struct nfs_fattr *		dir_attr;
 };
 
-
 struct nfs4_lookup_arg {
 	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
@@ -1028,6 +1027,20 @@ struct nfs4_lookup_res {
 	struct nfs4_label		*label;
 };
 
+struct nfs4_lookupp_arg {
+	struct nfs4_sequence_args	seq_args;
+	const struct nfs_fh		*fh;
+	const u32			*bitmask;
+};
+
+struct nfs4_lookupp_res {
+	struct nfs4_sequence_res	seq_res;
+	const struct nfs_server		*server;
+	struct nfs_fattr		*fattr;
+	struct nfs_fh			*fh;
+	struct nfs4_label		*label;
+};
+
 struct nfs4_lookup_root_arg {
 	struct nfs4_sequence_args	seq_args;
 	const u32 *			bitmask;
@@ -1053,7 +1066,7 @@ struct nfs4_readdir_arg {
 	struct page **			pages;	/* zero-copy data */
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
-	int				plus;
+	bool				plus;
 };
 
 struct nfs4_readdir_res {
@@ -1422,6 +1435,7 @@ enum {
 	NFS_IOHDR_STAT,
 };
 
+struct nfs_io_completion;
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
@@ -1435,8 +1449,8 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	const struct nfs_rw_ops	*rw_ops;
+	struct nfs_io_completion *io_completion;
 	struct nfs_direct_req	*dreq;
-	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
@@ -1533,6 +1547,7 @@ struct nfs_renamedata {
 	struct nfs_fattr	new_fattr;
 	void (*complete)(struct rpc_task *, struct nfs_renamedata *);
 	long timeout;
+	bool cancelled;
 };
 
 struct nfs_access_entry;
@@ -1567,6 +1582,8 @@ struct nfs_rpc_ops {
 	int	(*lookup)  (struct inode *, const struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *,
 			    struct nfs4_label *);
+	int	(*lookupp) (struct inode *, struct nfs_fh *,
+			    struct nfs_fattr *, struct nfs4_label *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
@@ -1585,7 +1602,7 @@ struct nfs_rpc_ops {
 	int	(*mkdir)   (struct inode *, struct dentry *, struct iattr *);
 	int	(*rmdir)   (struct inode *, const struct qstr *);
 	int	(*readdir) (struct dentry *, struct rpc_cred *,
-			    u64, struct page **, unsigned int, int);
+			    u64, struct page **, unsigned int, bool);
 	int	(*mknod)   (struct inode *, struct dentry *, struct iattr *,
 			    dev_t);
 	int	(*statfs)  (struct nfs_server *, struct nfs_fh *,
@@ -1595,7 +1612,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool);
 	int	(*pgio_rpc_prepare)(struct rpc_task *,
 				    struct nfs_pgio_header *);
 	void	(*read_setup)(struct nfs_pgio_header *, struct rpc_message *);

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index aa3cd08..8aa01fd 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h

@@ -6,18 +6,26 @@
 
 #include <linux/sched.h>
 #include <asm/irq.h>
+#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#include <asm/nmi.h>
+#endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
+void lockup_detector_init(void);
+#else
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
+extern int soft_watchdog_enabled;
+extern atomic_t watchdog_park_in_progress;
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
@@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
-static inline void lockup_detector_init(void)
-{
-}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -61,6 +66,21 @@ static inline void reset_hung_task_detector(void)
 #define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
 #define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void hardlockup_detector_disable(void);
+extern unsigned int hardlockup_panic;
+#else
+static inline void hardlockup_detector_disable(void) {}
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+extern void arch_touch_nmi_watchdog(void);
+#else
+#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+#endif
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  * 
@@ -68,21 +88,11 @@ static inline void reset_hung_task_detector(void)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-#include <asm/nmi.h>
-extern void touch_nmi_watchdog(void);
-#else
 static inline void touch_nmi_watchdog(void)
 {
+	arch_touch_nmi_watchdog();
 	touch_softlockup_watchdog();
 }
-#endif
-
-#if defined(CONFIG_HARDLOCKUP_DETECTOR)
-extern void hardlockup_detector_disable(void);
-#else
-static inline void hardlockup_detector_disable(void) {}
-#endif
 
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
@@ -139,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 }
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+#endif
+
+#ifdef CONFIG_LOCKUP_DETECTOR
 extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
+extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
-extern atomic_t watchdog_park_in_progress;
+extern int __read_mostly watchdog_suspended;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;

diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index de87cea..609e232 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h

@@ -5,6 +5,7 @@
  *   GPL LICENSE SUMMARY
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of version 2 of the GNU General Public License as
@@ -18,6 +19,7 @@
  *   BSD LICENSE
  *
  *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *   Copyright (C) 2016 T-Platforms. All Rights Reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -106,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo)
  * @NTB_SPEED_GEN1:	Link is trained to gen1 speed.
  * @NTB_SPEED_GEN2:	Link is trained to gen2 speed.
  * @NTB_SPEED_GEN3:	Link is trained to gen3 speed.
+ * @NTB_SPEED_GEN4:	Link is trained to gen4 speed.
  */
 enum ntb_speed {
 	NTB_SPEED_AUTO = -1,
@@ -113,6 +116,7 @@ enum ntb_speed {
 	NTB_SPEED_GEN1 = 1,
 	NTB_SPEED_GEN2 = 2,
 	NTB_SPEED_GEN3 = 3,
+	NTB_SPEED_GEN4 = 4
 };
 
 /**
@@ -140,6 +144,20 @@ enum ntb_width {
 };
 
 /**
+ * enum ntb_default_port - NTB default port number
+ * @NTB_PORT_PRI_USD:	Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD
+ *			topologies
+ * @NTB_PORT_SEC_DSD:	Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD
+ *			topologies
+ */
+enum ntb_default_port {
+	NTB_PORT_PRI_USD,
+	NTB_PORT_SEC_DSD
+};
+#define NTB_DEF_PEER_CNT	(1)
+#define NTB_DEF_PEER_IDX	(0)
+
+/**
  * struct ntb_client_ops - ntb client operations
  * @probe:		Notify client of a new device.
  * @remove:		Notify client to remove a device.
@@ -162,10 +180,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops)
  * struct ntb_ctx_ops - ntb driver context operations
  * @link_event:		See ntb_link_event().
  * @db_event:		See ntb_db_event().
+ * @msg_event:		See ntb_msg_event().
  */
 struct ntb_ctx_ops {
 	void (*link_event)(void *ctx);
 	void (*db_event)(void *ctx, int db_vector);
+	void (*msg_event)(void *ctx);
 };
 
 static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
@@ -174,18 +194,27 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 	return
 		/* ops->link_event		&& */
 		/* ops->db_event		&& */
+		/* ops->msg_event		&& */
 		1;
 }
 
 /**
  * struct ntb_ctx_ops - ntb device operations
- * @mw_count:		See ntb_mw_count().
- * @mw_get_range:	See ntb_mw_get_range().
- * @mw_set_trans:	See ntb_mw_set_trans().
- * @mw_clear_trans:	See ntb_mw_clear_trans().
+ * @port_number:	See ntb_port_number().
+ * @peer_port_count:	See ntb_peer_port_count().
+ * @peer_port_number:	See ntb_peer_port_number().
+ * @peer_port_idx:	See ntb_peer_port_idx().
  * @link_is_up:		See ntb_link_is_up().
  * @link_enable:	See ntb_link_enable().
  * @link_disable:	See ntb_link_disable().
+ * @mw_count:		See ntb_mw_count().
+ * @mw_get_align:	See ntb_mw_get_align().
+ * @mw_set_trans:	See ntb_mw_set_trans().
+ * @mw_clear_trans:	See ntb_mw_clear_trans().
+ * @peer_mw_count:	See ntb_peer_mw_count().
+ * @peer_mw_get_addr:	See ntb_peer_mw_get_addr().
+ * @peer_mw_set_trans:	See ntb_peer_mw_set_trans().
+ * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans().
  * @db_is_unsafe:	See ntb_db_is_unsafe().
  * @db_valid_mask:	See ntb_db_valid_mask().
  * @db_vector_count:	See ntb_db_vector_count().
@@ -210,22 +239,43 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
  * @peer_spad_addr:	See ntb_peer_spad_addr().
  * @peer_spad_read:	See ntb_peer_spad_read().
  * @peer_spad_write:	See ntb_peer_spad_write().
+ * @msg_count:		See ntb_msg_count().
+ * @msg_inbits:		See ntb_msg_inbits().
+ * @msg_outbits:	See ntb_msg_outbits().
+ * @msg_read_sts:	See ntb_msg_read_sts().
+ * @msg_clear_sts:	See ntb_msg_clear_sts().
+ * @msg_set_mask:	See ntb_msg_set_mask().
+ * @msg_clear_mask:	See ntb_msg_clear_mask().
+ * @msg_read:		See ntb_msg_read().
+ * @msg_write:		See ntb_msg_write().
  */
 struct ntb_dev_ops {
-	int (*mw_count)(struct ntb_dev *ntb);
-	int (*mw_get_range)(struct ntb_dev *ntb, int idx,
-			    phys_addr_t *base, resource_size_t *size,
-			resource_size_t *align, resource_size_t *align_size);
-	int (*mw_set_trans)(struct ntb_dev *ntb, int idx,
-			    dma_addr_t addr, resource_size_t size);
-	int (*mw_clear_trans)(struct ntb_dev *ntb, int idx);
+	int (*port_number)(struct ntb_dev *ntb);
+	int (*peer_port_count)(struct ntb_dev *ntb);
+	int (*peer_port_number)(struct ntb_dev *ntb, int pidx);
+	int (*peer_port_idx)(struct ntb_dev *ntb, int port);
 
-	int (*link_is_up)(struct ntb_dev *ntb,
+	u64 (*link_is_up)(struct ntb_dev *ntb,
 			  enum ntb_speed *speed, enum ntb_width *width);
 	int (*link_enable)(struct ntb_dev *ntb,
 			   enum ntb_speed max_speed, enum ntb_width max_width);
 	int (*link_disable)(struct ntb_dev *ntb);
 
+	int (*mw_count)(struct ntb_dev *ntb, int pidx);
+	int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx,
+			    resource_size_t *addr_align,
+			    resource_size_t *size_align,
+			    resource_size_t *size_max);
+	int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+			    dma_addr_t addr, resource_size_t size);
+	int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+	int (*peer_mw_count)(struct ntb_dev *ntb);
+	int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx,
+				phys_addr_t *base, resource_size_t *size);
+	int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx,
+				 u64 addr, resource_size_t size);
+	int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx);
+
 	int (*db_is_unsafe)(struct ntb_dev *ntb);
 	u64 (*db_valid_mask)(struct ntb_dev *ntb);
 	int (*db_vector_count)(struct ntb_dev *ntb);
@@ -252,32 +302,55 @@ struct ntb_dev_ops {
 	int (*spad_is_unsafe)(struct ntb_dev *ntb);
 	int (*spad_count)(struct ntb_dev *ntb);
 
-	u32 (*spad_read)(struct ntb_dev *ntb, int idx);
-	int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*spad_read)(struct ntb_dev *ntb, int sidx);
+	int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val);
 
-	int (*peer_spad_addr)(struct ntb_dev *ntb, int idx,
+	int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx,
 			      phys_addr_t *spad_addr);
-	u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx);
-	int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val);
+	u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx);
+	int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx,
+			       u32 val);
+
+	int (*msg_count)(struct ntb_dev *ntb);
+	u64 (*msg_inbits)(struct ntb_dev *ntb);
+	u64 (*msg_outbits)(struct ntb_dev *ntb);
+	u64 (*msg_read_sts)(struct ntb_dev *ntb);
+	int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits);
+	int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits);
+	int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg);
+	int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg);
 };
 
 static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 {
 	/* commented callbacks are not required: */
 	return
-		ops->mw_count				&&
-		ops->mw_get_range			&&
-		ops->mw_set_trans			&&
-		/* ops->mw_clear_trans			&& */
+		/* Port operations are required for multiport devices */
+		!ops->peer_port_count == !ops->port_number	&&
+		!ops->peer_port_number == !ops->port_number	&&
+		!ops->peer_port_idx == !ops->port_number	&&
+
+		/* Link operations are required */
 		ops->link_is_up				&&
 		ops->link_enable			&&
 		ops->link_disable			&&
+
+		/* One or both MW interfaces should be developed */
+		ops->mw_count				&&
+		ops->mw_get_align			&&
+		(ops->mw_set_trans			||
+		 ops->peer_mw_set_trans)		&&
+		/* ops->mw_clear_trans			&& */
+		ops->peer_mw_count			&&
+		ops->peer_mw_get_addr			&&
+		/* ops->peer_mw_clear_trans		&& */
+
+		/* Doorbell operations are mostly required */
 		/* ops->db_is_unsafe			&& */
 		ops->db_valid_mask			&&
-
 		/* both set, or both unset */
-		(!ops->db_vector_count == !ops->db_vector_mask) &&
-
+		(!ops->db_vector_count == !ops->db_vector_mask)	&&
 		ops->db_read				&&
 		/* ops->db_set				&& */
 		ops->db_clear				&&
@@ -291,13 +364,24 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops)
 		/* ops->peer_db_read_mask		&& */
 		/* ops->peer_db_set_mask		&& */
 		/* ops->peer_db_clear_mask		&& */
-		/* ops->spad_is_unsafe			&& */
-		ops->spad_count				&&
-		ops->spad_read				&&
-		ops->spad_write				&&
-		/* ops->peer_spad_addr			&& */
-		/* ops->peer_spad_read			&& */
-		ops->peer_spad_write			&&
+
+		/* Scrachpads interface is optional */
+		/* !ops->spad_is_unsafe == !ops->spad_count	&& */
+		!ops->spad_read == !ops->spad_count		&&
+		!ops->spad_write == !ops->spad_count		&&
+		/* !ops->peer_spad_addr == !ops->spad_count	&& */
+		/* !ops->peer_spad_read == !ops->spad_count	&& */
+		!ops->peer_spad_write == !ops->spad_count	&&
+
+		/* Messaging interface is optional */
+		!ops->msg_inbits == !ops->msg_count		&&
+		!ops->msg_outbits == !ops->msg_count		&&
+		!ops->msg_read_sts == !ops->msg_count		&&
+		!ops->msg_clear_sts == !ops->msg_count		&&
+		/* !ops->msg_set_mask == !ops->msg_count	&& */
+		/* !ops->msg_clear_mask == !ops->msg_count	&& */
+		!ops->msg_read == !ops->msg_count		&&
+		!ops->msg_write == !ops->msg_count		&&
 		1;
 }
 
@@ -310,13 +394,12 @@ struct ntb_client {
 	struct device_driver		drv;
 	const struct ntb_client_ops	ops;
 };
-
 #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
 
 /**
  * struct ntb_device - ntb device
  * @dev:		Linux device object.
- * @pdev:		Pci device entry of the ntb.
+ * @pdev:		PCI device entry of the ntb.
  * @topo:		Detected topology of the ntb.
  * @ops:		See &ntb_dev_ops.
  * @ctx:		See &ntb_ctx_ops.
@@ -337,7 +420,6 @@ struct ntb_dev {
 	/* block unregister until device is fully released */
 	struct completion		released;
 };
-
 #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
 
 /**
@@ -434,86 +516,152 @@ void ntb_link_event(struct ntb_dev *ntb);
  * multiple interrupt vectors for doorbells, the vector number indicates which
  * vector received the interrupt.  The vector number is relative to the first
  * vector used for doorbells, starting at zero, and must be less than
- ** ntb_db_vector_count().  The driver may call ntb_db_read() to check which
+ * ntb_db_vector_count().  The driver may call ntb_db_read() to check which
  * doorbell bits need service, and ntb_db_vector_mask() to determine which of
  * those bits are associated with the vector number.
  */
 void ntb_db_event(struct ntb_dev *ntb, int vector);
 
 /**
- * ntb_mw_count() - get the number of memory windows
+ * ntb_msg_event() - notify driver context of a message event
  * @ntb:	NTB device context.
  *
- * Hardware and topology may support a different number of memory windows.
- *
- * Return: the number of memory windows.
+ * Notify the driver context of a message event.  If hardware supports
+ * message registers, this event indicates, that a new message arrived in
+ * some incoming message register or last sent message couldn't be delivered.
+ * The events can be masked/unmasked by the methods ntb_msg_set_mask() and
+ * ntb_msg_clear_mask().
  */
-static inline int ntb_mw_count(struct ntb_dev *ntb)
+void ntb_msg_event(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_number() - get the default local port number
+ * @ntb:	NTB device context.
+ *
+ * If hardware driver doesn't specify port_number() callback method, the NTB
+ * is considered with just two ports. So this method returns default local
+ * port number in compliance with topology.
+ *
+ * NOTE Don't call this method directly. The ntb_port_number() function should
+ * be used instead.
+ *
+ * Return: the default local port number
+ */
+int ntb_default_port_number(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_port_count() - get the default number of peer device ports
+ * @ntb:	NTB device context.
+ *
+ * By default hardware driver supports just one peer device.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_count() function
+ * should be used instead.
+ *
+ * Return: the default number of peer ports
+ */
+int ntb_default_peer_port_count(struct ntb_dev *ntb);
+
+/**
+ * ntb_default_peer_port_number() - get the default peer port by given index
+ * @ntb:	NTB device context.
+ * @idx:	Peer port index (should not differ from zero).
+ *
+ * By default hardware driver supports just one peer device, so this method
+ * shall return the corresponding value from enum ntb_default_port.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_number() function
+ * should be used instead.
+ *
+ * Return: the peer device port or negative value indicating an error
+ */
+int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx);
+
+/**
+ * ntb_default_peer_port_idx() - get the default peer device port index by
+ *				 given port number
+ * @ntb:	NTB device context.
+ * @port:	Peer port number (should be one of enum ntb_default_port).
+ *
+ * By default hardware driver supports just one peer device, so while
+ * specified port-argument indicates peer port from enum ntb_default_port,
+ * the return value shall be zero.
+ *
+ * NOTE Don't call this method directly. The ntb_peer_port_idx() function
+ * should be used instead.
+ *
+ * Return: the peer port index or negative value indicating an error
+ */
+int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port);
+
+/**
+ * ntb_port_number() - get the local port number
+ * @ntb:	NTB device context.
+ *
+ * Hardware must support at least simple two-ports ntb connection
+ *
+ * Return: the local port number
+ */
+static inline int ntb_port_number(struct ntb_dev *ntb)
 {
-	return ntb->ops->mw_count(ntb);
+	if (!ntb->ops->port_number)
+		return ntb_default_port_number(ntb);
+
+	return ntb->ops->port_number(ntb);
 }
 
 /**
- * ntb_mw_get_range() - get the range of a memory window
+ * ntb_peer_port_count() - get the number of peer device ports
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @base:	OUT - the base address for mapping the memory window
- * @size:	OUT - the size for mapping the memory window
- * @align:	OUT - the base alignment for translating the memory window
- * @align_size:	OUT - the size alignment for translating the memory window
  *
- * Get the range of a memory window.  NULL may be given for any output
- * parameter if the value is not needed.  The base and size may be used for
- * mapping the memory window, to access the peer memory.  The alignment and
- * size may be used for translating the memory window, for the peer to access
- * memory on the local system.
+ * Hardware may support an access to memory of several remote domains
+ * over multi-port NTB devices. This method returns the number of peers,
+ * local device can have shared memory with.
  *
- * Return: Zero on success, otherwise an error number.
+ * Return: the number of peer ports
  */
-static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx,
-				   phys_addr_t *base, resource_size_t *size,
-		resource_size_t *align, resource_size_t *align_size)
+static inline int ntb_peer_port_count(struct ntb_dev *ntb)
 {
-	return ntb->ops->mw_get_range(ntb, idx, base, size,
-			align, align_size);
+	if (!ntb->ops->peer_port_count)
+		return ntb_default_peer_port_count(ntb);
+
+	return ntb->ops->peer_port_count(ntb);
 }
 
 /**
- * ntb_mw_set_trans() - set the translation of a memory window
+ * ntb_peer_port_number() - get the peer port by given index
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
- * @addr:	The dma address local memory to expose to the peer.
- * @size:	The size of the local memory to expose to the peer.
+ * @pidx:	Peer port index.
  *
- * Set the translation of a memory window.  The peer may access local memory
- * through the window starting at the address, up to the size.  The address
- * must be aligned to the alignment specified by ntb_mw_get_range().  The size
- * must be aligned to the size alignment specified by ntb_mw_get_range().
+ * Peer ports are continuously enumerated by NTB API logic, so this method
+ * lets to retrieve port real number by its index.
  *
- * Return: Zero on success, otherwise an error number.
+ * Return: the peer device port or negative value indicating an error
  */
-static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
-				   dma_addr_t addr, resource_size_t size)
+static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
 {
-	return ntb->ops->mw_set_trans(ntb, idx, addr, size);
+	if (!ntb->ops->peer_port_number)
+		return ntb_default_peer_port_number(ntb, pidx);
+
+	return ntb->ops->peer_port_number(ntb, pidx);
 }
 
 /**
- * ntb_mw_clear_trans() - clear the translation of a memory window
+ * ntb_peer_port_idx() - get the peer device port index by given port number
  * @ntb:	NTB device context.
- * @idx:	Memory window number.
+ * @port:	Peer port number.
  *
- * Clear the translation of a memory window.  The peer may no longer access
- * local memory through the window.
+ * Inverse operation of ntb_peer_port_number(), so one can get port index
+ * by specified port number.
  *
- * Return: Zero on success, otherwise an error number.
+ * Return: the peer port index or negative value indicating an error
  */
-static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
+static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port)
 {
-	if (!ntb->ops->mw_clear_trans)
-		return ntb->ops->mw_set_trans(ntb, idx, 0, 0);
+	if (!ntb->ops->peer_port_idx)
+		return ntb_default_peer_port_idx(ntb, port);
 
-	return ntb->ops->mw_clear_trans(ntb, idx);
+	return ntb->ops->peer_port_idx(ntb, port);
 }
 
 /**
@@ -526,25 +674,26 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
  * state once after every link event.  It is safe to query the link state in
  * the context of the link event callback.
  *
- * Return: One if the link is up, zero if the link is down, otherwise a
- *		negative value indicating the error number.
+ * Return: bitfield of indexed ports link state: bit is set/cleared if the
+ *         link is up/down respectively.
  */
-static inline int ntb_link_is_up(struct ntb_dev *ntb,
+static inline u64 ntb_link_is_up(struct ntb_dev *ntb,
 				 enum ntb_speed *speed, enum ntb_width *width)
 {
 	return ntb->ops->link_is_up(ntb, speed, width);
 }
 
 /**
- * ntb_link_enable() - enable the link on the secondary side of the ntb
+ * ntb_link_enable() - enable the local port ntb connection
  * @ntb:	NTB device context.
  * @max_speed:	The maximum link speed expressed as PCIe generation number.
  * @max_width:	The maximum link width expressed as the number of PCIe lanes.
  *
- * Enable the link on the secondary side of the ntb.  This can only be done
- * from the primary side of the ntb in primary or b2b topology.  The ntb device
- * should train the link to its maximum speed and width, or the requested speed
- * and width, whichever is smaller, if supported.
+ * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge
+ * topology) side of the bridge. If it's supported the ntb device should train
+ * the link to its maximum speed and width, or the requested speed and width,
+ * whichever is smaller. Some hardware doesn't support PCIe link training, so
+ * the last two arguments will be ignored then.
  *
  * Return: Zero on success, otherwise an error number.
  */
@@ -556,14 +705,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb,
 }
 
 /**
- * ntb_link_disable() - disable the link on the secondary side of the ntb
+ * ntb_link_disable() - disable the local port ntb connection
  * @ntb:	NTB device context.
  *
- * Disable the link on the secondary side of the ntb.  This can only be
- * done from the primary side of the ntb in primary or b2b topology.  The ntb
- * device should disable the link.  Returning from this call must indicate that
- * a barrier has passed, though with no more writes may pass in either
- * direction across the link, except if this call returns an error number.
+ * Disable the link on the local or remote (for b2b topology) of the ntb.
+ * The ntb device should disable the link.  Returning from this call must
+ * indicate that a barrier has passed, though with no more writes may pass in
+ * either direction across the link, except if this call returns an error
+ * number.
  *
  * Return: Zero on success, otherwise an error number.
  */
@@ -573,6 +722,183 @@ static inline int ntb_link_disable(struct ntb_dev *ntb)
 }
 
 /**
+ * ntb_mw_count() - get the number of inbound memory windows, which could
+ *                  be created for a specified peer device
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * Moreover different peer devices can support different number of memory
+ * windows. Simply speaking this method returns the number of possible inbound
+ * memory windows to share with specified peer device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+	return ntb->ops->mw_count(ntb, pidx);
+}
+
+/**
+ * ntb_mw_get_align() - get the restriction parameters of inbound memory window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr_align:	OUT - the base alignment for translating the memory window
+ * @size_align:	OUT - the size alignment for translating the memory window
+ * @size_max:	OUT - the maximum size of the memory window
+ *
+ * Get the alignments of an inbound memory window with specified index.
+ * NULL may be given for any output parameter if the value is not needed.
+ * The alignment and size parameters may be used for allocation of proper
+ * shared memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx,
+				   resource_size_t *addr_align,
+				   resource_size_t *size_align,
+				   resource_size_t *size_max)
+{
+	return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align,
+				      size_max);
+}
+
+/**
+ * ntb_mw_set_trans() - set the translation of an inbound memory window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of local memory to expose to the peer.
+ * @size:	The size of the local memory to expose to the peer.
+ *
+ * Set the translation of a memory window.  The peer may access local memory
+ * through the window starting at the address, up to the size.  The address
+ * and size must be aligned in compliance with restrictions of
+ * ntb_mw_get_align(). The region size should not exceed the size_max parameter
+ * of that method.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+				   dma_addr_t addr, resource_size_t size)
+{
+	if (!ntb->ops->mw_set_trans)
+		return 0;
+
+	return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_mw_clear_trans() - clear the translation address of an inbound memory
+ *                        window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * Clear the translation of an inbound memory window.  The peer may no longer
+ * access local memory through the window.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx)
+{
+	if (!ntb->ops->mw_clear_trans)
+		return ntb_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+	return ntb->ops->mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
+ * ntb_peer_mw_count() - get the number of outbound memory windows, which could
+ *                       be mapped to access a shared memory
+ * @ntb:	NTB device context.
+ *
+ * Hardware and topology may support a different number of memory windows.
+ * This method returns the number of outbound memory windows supported by
+ * local device.
+ *
+ * Return: the number of memory windows.
+ */
+static inline int ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+	return ntb->ops->peer_mw_count(ntb);
+}
+
+/**
+ * ntb_peer_mw_get_addr() - get map address of an outbound memory window
+ * @ntb:	NTB device context.
+ * @widx:	Memory window index (within ntb_peer_mw_count() return value).
+ * @base:	OUT - the base address of mapping region.
+ * @size:	OUT - the size of mapping region.
+ *
+ * Get base and size of memory region to map.  NULL may be given for any output
+ * parameter if the value is not needed.  The base and size may be used for
+ * mapping the memory window, to access the peer memory.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx,
+				      phys_addr_t *base, resource_size_t *size)
+{
+	return ntb->ops->peer_mw_get_addr(ntb, widx, base, size);
+}
+
+/**
+ * ntb_peer_mw_set_trans() - set a translation address of a memory window
+ *                           retrieved from a peer device
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device the translation address received from.
+ * @widx:	Memory window index.
+ * @addr:	The dma address of the shared memory to access.
+ * @size:	The size of the shared memory to access.
+ *
+ * Set the translation of an outbound memory window.  The local device may
+ * access shared memory allocated by a peer device sent the address.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface, so a translation address can be only set on the side,
+ * where shared memory (inbound memory windows) is allocated.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+					u64 addr, resource_size_t size)
+{
+	if (!ntb->ops->peer_mw_set_trans)
+		return 0;
+
+	return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size);
+}
+
+/**
+ * ntb_peer_mw_clear_trans() - clear the translation address of an outbound
+ *                             memory window
+ * @ntb:	NTB device context.
+ * @pidx:	Port index of peer device.
+ * @widx:	Memory window index.
+ *
+ * Clear the translation of a outbound memory window.  The local device may no
+ * longer access a shared memory through the window.
+ *
+ * This method may not be implemented due to the hardware specific memory
+ * windows interface.
+ *
+ * Return: Zero on success, otherwise an error number.
+ */
+static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx,
+					  int widx)
+{
+	if (!ntb->ops->peer_mw_clear_trans)
+		return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0);
+
+	return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx);
+}
+
+/**
  * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell
  * @ntb:	NTB device context.
  *
@@ -900,47 +1226,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb)
  * @ntb:	NTB device context.
  *
  * Hardware and topology may support a different number of scratchpads.
+ * Although it must be the same for all ports per NTB device.
  *
  * Return: the number of scratchpads.
  */
 static inline int ntb_spad_count(struct ntb_dev *ntb)
 {
+	if (!ntb->ops->spad_count)
+		return 0;
+
 	return ntb->ops->spad_count(ntb);
 }
 
 /**
  * ntb_spad_read() - read the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  *
  * Read the local scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx)
 {
-	return ntb->ops->spad_read(ntb, idx);
+	if (!ntb->ops->spad_read)
+		return ~(u32)0;
+
+	return ntb->ops->spad_read(ntb, sidx);
 }
 
 /**
  * ntb_spad_write() - write the local scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the local scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
 {
-	return ntb->ops->spad_write(ntb, idx, val);
+	if (!ntb->ops->spad_write)
+		return -EINVAL;
+
+	return ntb->ops->spad_write(ntb, sidx, val);
 }
 
 /**
  * ntb_peer_spad_addr() - address of the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @spad_addr:	OUT - The address of the peer scratchpad register.
  *
  * Return the address of the peer doorbell register.  This may be used, for
@@ -948,45 +1285,213 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx,
 				     phys_addr_t *spad_addr)
 {
 	if (!ntb->ops->peer_spad_addr)
 		return -EINVAL;
 
-	return ntb->ops->peer_spad_addr(ntb, idx, spad_addr);
+	return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr);
 }
 
 /**
  * ntb_peer_spad_read() - read the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  *
  * Read the peer scratchpad register, and return the value.
  *
  * Return: The value of the local scratchpad register.
  */
-static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx)
 {
 	if (!ntb->ops->peer_spad_read)
-		return 0;
+		return ~(u32)0;
 
-	return ntb->ops->peer_spad_read(ntb, idx);
+	return ntb->ops->peer_spad_read(ntb, pidx, sidx);
 }
 
 /**
  * ntb_peer_spad_write() - write the peer scratchpad register
  * @ntb:	NTB device context.
- * @idx:	Scratchpad index.
+ * @pidx:	Port index of peer device.
+ * @sidx:	Scratchpad index.
  * @val:	Scratchpad value.
  *
  * Write the value to the peer scratchpad register.
  *
  * Return: Zero on success, otherwise an error number.
  */
-static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx,
+				      u32 val)
 {
-	return ntb->ops->peer_spad_write(ntb, idx, val);
+	if (!ntb->ops->peer_spad_write)
+		return -EINVAL;
+
+	return ntb->ops->peer_spad_write(ntb, pidx, sidx, val);
+}
+
+/**
+ * ntb_msg_count() - get the number of message registers
+ * @ntb:	NTB device context.
+ *
+ * Hardware may support a different number of message registers.
+ *
+ * Return: the number of message registers.
+ */
+static inline int ntb_msg_count(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_count)
+		return 0;
+
+	return ntb->ops->msg_count(ntb);
+}
+
+/**
+ * ntb_msg_inbits() - get a bitfield of inbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to inbound message registers.
+ *
+ * Return: bitfield of inbound message registers.
+ */
+static inline u64 ntb_msg_inbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_inbits)
+		return 0;
+
+	return ntb->ops->msg_inbits(ntb);
+}
+
+/**
+ * ntb_msg_outbits() - get a bitfield of outbound message registers status
+ * @ntb:	NTB device context.
+ *
+ * The method returns the bitfield of status and mask registers, which related
+ * to outbound message registers.
+ *
+ * Return: bitfield of outbound message registers.
+ */
+static inline u64 ntb_msg_outbits(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_outbits)
+		return 0;
+
+	return ntb->ops->msg_outbits(ntb);
+}
+
+/**
+ * ntb_msg_read_sts() - read the message registers status
+ * @ntb:	NTB device context.
+ *
+ * Read the status of message register. Inbound and outbound message registers
+ * related bits can be filtered by masks retrieved from ntb_msg_inbits() and
+ * ntb_msg_outbits().
+ *
+ * Return: status bits of message registers
+ */
+static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb)
+{
+	if (!ntb->ops->msg_read_sts)
+		return 0;
+
+	return ntb->ops->msg_read_sts(ntb);
+}
+
+/**
+ * ntb_msg_clear_sts() - clear status bits of message registers
+ * @ntb:	NTB device context.
+ * @sts_bits:	Status bits to clear.
+ *
+ * Clear bits in the status register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits)
+{
+	if (!ntb->ops->msg_clear_sts)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_sts(ntb, sts_bits);
+}
+
+/**
+ * ntb_msg_set_mask() - set mask of message register status bits
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits.
+ *
+ * Mask the message registers status bits from raising the message event.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_set_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_set_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_clear_mask() - clear message registers mask
+ * @ntb:	NTB device context.
+ * @mask_bits:	Mask bits to clear.
+ *
+ * Clear bits in the message events mask register.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits)
+{
+	if (!ntb->ops->msg_clear_mask)
+		return -EINVAL;
+
+	return ntb->ops->msg_clear_mask(ntb, mask_bits);
+}
+
+/**
+ * ntb_msg_read() - read message register with specified index
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	OUT - Port index of peer device a message retrieved from
+ * @msg:	OUT - Data
+ *
+ * Read data from the specified message register. Source port index of a
+ * message is retrieved as well.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx,
+			       u32 *msg)
+{
+	if (!ntb->ops->msg_read)
+		return -EINVAL;
+
+	return ntb->ops->msg_read(ntb, midx, pidx, msg);
+}
+
+/**
+ * ntb_msg_write() - write data to the specified message register
+ * @ntb:	NTB device context.
+ * @midx:	Message register index
+ * @pidx:	Port index of peer device a message being sent to
+ * @msg:	Data to send
+ *
+ * Send data to a specified peer device using the defined message register.
+ * Message event can be raised if the midx registers isn't empty while
+ * calling this method and the corresponding interrupt isn't masked.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx,
+				u32 msg)
+{
+	if (!ntb->ops->msg_write)
+		return -EINVAL;
+
+	return ntb->ops->msg_write(ntb, midx, pidx, msg);
 }
 
 #endif

diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index bc711a1..21c37e3 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h

@@ -17,6 +17,7 @@
 
 /*
  * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
+ * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
  */
 
 #ifndef _NVME_FC_H
@@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd {
 	uuid_t	hostid;
 	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
 	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
-	u8	rsvd632[384];
+	__be32	rsvd584[108];		/* pad to 1016 bytes,
+					 * which makes overall LS rqst
+					 * payload 1024 bytes
+					 */
 };
 
+#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN	\
+		offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584)
+
+#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN	\
+		(FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \
+		 offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio))
+
+
+
 /* FCNVME_LSDESC_CREATE_CONN_CMD */
 struct fcnvme_lsdesc_cr_conn_cmd {
 	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
@@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst {
 	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
 };
 
+#define FCNVME_LSDESC_CRA_RQST_MINLEN	\
+		(offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \
+			FCNVME_LSDESC_CRA_CMD_DESC_MINLEN)
+
+#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN	\
+		FCNVME_LSDESC_CRA_CMD_DESC_MINLEN
+
+
 struct fcnvme_ls_cr_assoc_acc {
 	struct fcnvme_ls_acc_hdr		hdr;
 	struct fcnvme_lsdesc_assoc_id		associd;

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index cd93416..497706f 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h

@@ -12,6 +12,9 @@
 #ifndef _LINUX_NVMEM_PROVIDER_H
 #define _LINUX_NVMEM_PROVIDER_H
 
+#include <linux/err.h>
+#include <linux/errno.h>
+
 struct nvmem_device;
 struct nvmem_cell_info;
 typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,

diff --git a/include/linux/of.h b/include/linux/of.h
index fa089a2..4a8a709 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h

@@ -100,10 +100,12 @@ struct of_reconfig_data {
 
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
+extern const struct fwnode_operations of_fwnode_ops;
 static inline void of_node_init(struct device_node *node)
 {
 	kobject_init(&node->kobj, &of_node_ktype);
 	node->fwnode.type = FWNODE_OF;
+	node->fwnode.ops = &of_fwnode_ops;
 }
 
 /* true when node is initialized */

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 610e132..1fd7173 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h

@@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 	VM_BUG_ON_PAGE(page_count(page) != 0, page);
 	VM_BUG_ON(count == 0);
 
+	smp_mb();
 	atomic_set(&page->_refcount, count);
 	if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze))
 		__page_ref_unfreeze(page, count);

diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/platform_data/i2c-hid.h
similarity index 100%
rename from include/linux/i2c/i2c-hid.h
rename to include/linux/platform_data/i2c-hid.h


diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h
index 7fa1fbe..cc7554a 100644
--- a/include/linux/platform_data/usb-ohci-s3c2410.h
+++ b/include/linux/platform_data/usb-ohci-s3c2410.h

@@ -31,7 +31,7 @@ struct s3c2410_hcd_info {
 	void		(*report_oc)(struct s3c2410_hcd_info *, int ports);
 };
 
-static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
+static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports)
 {
 	if (info->report_oc != NULL) {
 		(info->report_oc)(info, ports);

diff --git a/include/linux/property.h b/include/linux/property.h
index 2f48261..7e77039 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h

@@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname,
 int device_property_match_string(struct device *dev,
 				 const char *propname, const char *string);
 
+bool fwnode_device_is_available(struct fwnode_handle *fwnode);
 bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
 int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
 				  const char *propname, u8 *val,
@@ -274,12 +275,16 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen);
 
 struct fwnode_handle *fwnode_graph_get_next_endpoint(
 	struct fwnode_handle *fwnode, struct fwnode_handle *prev);
+struct fwnode_handle *
+fwnode_graph_get_port_parent(struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port_parent(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_port(
 	struct fwnode_handle *fwnode);
 struct fwnode_handle *fwnode_graph_get_remote_endpoint(
 	struct fwnode_handle *fwnode);
+struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode,
+						   u32 port, u32 endpoint);
 
 int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint);

diff --git a/include/linux/random.h b/include/linux/random.h
index ed5c383..1fa0dc8 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h

@@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void)
 #endif
 }
 
+/*
+ * On 64-bit architectures, protect against non-terminated C string overflows
+ * by zeroing out the first byte of the canary; this leaves 56 bits of entropy.
+ */
+#ifdef CONFIG_64BIT
+# ifdef __LITTLE_ENDIAN
+#  define CANARY_MASK 0xffffffffffffff00UL
+# else /* big endian, 64 bits: */
+#  define CANARY_MASK 0x00ffffffffffffffUL
+# endif
+#else /* 32 bits: */
+# define CANARY_MASK 0xffffffffUL
+#endif
+
+static inline unsigned long get_random_canary(void)
+{
+	unsigned long val = get_random_long();
+
+	return val & CANARY_MASK;
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);

diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index b693ada..0a0f0d1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h

@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/nvmem-provider.h>
 #include <uapi/linux/rtc.h>
 
 extern int rtc_month_days(unsigned int month, unsigned int year);
@@ -32,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs)
 	return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs);
 }
 
-/**
- * Deprecated. Use rtc_time64_to_tm().
- */
 static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
 {
 	rtc_time64_to_tm(time, tm);
 }
 
-/**
- * Deprecated. Use rtc_tm_to_time64().
- */
 static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
 {
 	*time = rtc_tm_to_time64(tm);
@@ -116,7 +111,6 @@ struct rtc_device {
 	struct module *owner;
 
 	int id;
-	char name[RTC_DEVICE_NAME_SIZE];
 
 	const struct rtc_class_ops *ops;
 	struct mutex ops_lock;
@@ -143,6 +137,14 @@ struct rtc_device {
 	/* Some hardware can't support UIE mode */
 	int uie_unsupported;
 
+	bool registered;
+
+	struct nvmem_config *nvmem_config;
+	struct nvmem_device *nvmem;
+	/* Old ABI support */
+	bool nvram_old_abi;
+	struct bin_attribute *nvram;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -164,6 +166,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev,
 					const char *name,
 					const struct rtc_class_ops *ops,
 					struct module *owner);
+struct rtc_device *devm_rtc_allocate_device(struct device *dev);
+int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
 extern void rtc_device_unregister(struct rtc_device *rtc);
 extern void devm_rtc_device_unregister(struct device *dev,
 					struct rtc_device *rtc);
@@ -219,6 +223,9 @@ static inline bool is_leap_year(unsigned int year)
 	return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
+#define rtc_register_device(device) \
+	__rtc_register_device(THIS_MODULE, device)
+
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
 extern int rtc_hctosys_ret;
 #else

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 20814b7..3822d74 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -974,6 +974,7 @@ struct task_struct {
 
 #ifdef CONFIG_FAULT_INJECTION
 	int				make_it_fail;
+	int fail_nth;
 #endif
 	/*
 	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 69eedce..98ae0d0 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h

@@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
 #define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
 #define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+#define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
+				 MMF_DISABLE_THP_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */

diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9edec92..be5cf2e 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h

@@ -8,11 +8,29 @@
 
 struct task_struct;
 
+/* One semaphore structure for each semaphore in the system. */
+struct sem {
+	int	semval;		/* current value */
+	/*
+	 * PID of the process that last modified the semaphore. For
+	 * Linux, specifically these are:
+	 *  - semop
+	 *  - semctl, via SETVAL and SETALL.
+	 *  - at task exit when performing undo adjustments (see exit_sem).
+	 */
+	int	sempid;
+	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
+	struct list_head pending_alter; /* pending single-sop operations */
+					/* that alter the semaphore */
+	struct list_head pending_const; /* pending single-sop operations */
+					/* that do not alter the semaphore*/
+	time_t	sem_otime;	/* candidate for sem_otime */
+} ____cacheline_aligned_in_smp;
+
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
-	time_t			sem_ctime;	/* last change time */
-	struct sem		*sem_base;	/* ptr to first semaphore in array */
+	time_t			sem_ctime;	/* create/last semctl() time */
 	struct list_head	pending_alter;	/* pending operations */
 						/* that alter the array */
 	struct list_head	pending_const;	/* pending complex operations */
@@ -21,6 +39,8 @@ struct sem_array {
 	int			sem_nsems;	/* no. of semaphores in array */
 	int			complex_count;	/* pending complex operations */
 	unsigned int		use_global_lock;/* >0: global lock required */
+
+	struct sem		sems[];
 };
 
 #ifdef CONFIG_SYSVIPC

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 04a7f79..41473df 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h

@@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  *
  * %__GFP_NOWARN - If allocation fails, don't issue any warnings.
  *
- * %__GFP_REPEAT - If allocation fails initially, try once more before failing.
+ * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail
+ *   eventually.
  *
  * There are other flags available as well, but these are not intended
  * for general use, and so are not documented here. For a full list of

diff --git a/include/linux/string.h b/include/linux/string.h
index 7439d83..96f5a5f 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h

@@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path)
 	return tail ? tail + 1 : path;
 }
 
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
+
+#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strcpy(p, q);
+	if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strcat(p, q);
+	if (strlcat(p, q, p_size) >= p_size)
+		fortify_panic(__func__);
+	return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+	__kernel_size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	if (p_size == (size_t)-1)
+		return __builtin_strlen(p);
+	ret = strnlen(p, p_size);
+	if (p_size <= ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+	if (p_size <= ret && maxlen != ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+	size_t ret;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __real_strlcpy(p, q, size);
+	ret = strlen(q);
+	if (size) {
+		size_t len = (ret >= size) ? size - 1 : ret;
+		if (__builtin_constant_p(len) && len >= p_size)
+			__write_overflow();
+		if (len >= p_size)
+			fortify_panic(__func__);
+		__builtin_memcpy(p, q, len);
+		p[len] = '\0';
+	}
+	return ret;
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+	size_t p_len, copy_len;
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (p_size == (size_t)-1 && q_size == (size_t)-1)
+		return __builtin_strncat(p, q, count);
+	p_len = strlen(p);
+	copy_len = strnlen(q, count);
+	if (p_size < p_len + copy_len + 1)
+		fortify_panic(__func__);
+	__builtin_memcpy(p + p_len, q, copy_len);
+	p[p_len + copy_len] = '\0';
+	return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__write_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	size_t q_size = __builtin_object_size(q, 0);
+	if (__builtin_constant_p(size)) {
+		if (p_size < size)
+			__read_overflow();
+		if (q_size < size)
+			__read_overflow2();
+	}
+	if (p_size < size || q_size < size)
+		fortify_panic(__func__);
+	return __builtin_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __builtin_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+	size_t p_size = __builtin_object_size(p, 0);
+	if (__builtin_constant_p(size) && p_size < size)
+		__read_overflow();
+	if (p_size < size)
+		fortify_panic(__func__);
+	return __real_kmemdup(p, size, gfp);
+}
+#endif
+
 #endif /* _LINUX_STRING_H_ */

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6095ecb..55ef67b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h

@@ -39,7 +39,7 @@ struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
-	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
@@ -87,7 +87,8 @@ struct rpc_program {
 struct rpc_version {
 	u32			number;		/* version number */
 	unsigned int		nrprocs;	/* number of procs */
-	struct rpc_procinfo *	procs;		/* procedure array */
+	const struct rpc_procinfo *procs;	/* procedure array */
+	unsigned int		*counts;	/* call counts */
 };
 
 /*
@@ -99,7 +100,6 @@ struct rpc_procinfo {
 	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
-	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
 	u32			p_statidx;	/* Which procedure to account */
 	const char *		p_name;		/* name of procedure */

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9d7529f..50a99a1 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h

@@ -22,7 +22,7 @@
  */
 struct rpc_procinfo;
 struct rpc_message {
-	struct rpc_procinfo *	rpc_proc;	/* Procedure information */
+	const struct rpc_procinfo *rpc_proc;	/* Procedure information */
 	void *			rpc_argp;	/* Arguments */
 	void *			rpc_resp;	/* Result */
 	struct rpc_cred *	rpc_cred;	/* Credentials */

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 11cef5a..a3f8af9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h

@@ -237,7 +237,7 @@ struct svc_rqst {
 
 	struct svc_serv *	rq_server;	/* RPC service definition */
 	struct svc_pool *	rq_pool;	/* thread pool */
-	struct svc_procedure *	rq_procinfo;	/* procedure info */
+	const struct svc_procedure *rq_procinfo;/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
@@ -246,7 +246,7 @@ struct svc_rqst {
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
 	struct xdr_buf		rq_res;
-	struct page *		rq_pages[RPCSVC_MAXPAGES];
+	struct page		*rq_pages[RPCSVC_MAXPAGES + 1];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
@@ -384,7 +384,7 @@ struct svc_program {
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
-	struct svc_version **	pg_vers;	/* version array */
+	const struct svc_version **pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
 	char *			pg_class;	/* class name: services sharing authentication */
 	struct svc_stat *	pg_stats;	/* rpc statistics */
@@ -397,7 +397,8 @@ struct svc_program {
 struct svc_version {
 	u32			vs_vers;	/* version number */
 	u32			vs_nproc;	/* number of procedures */
-	struct svc_procedure *	vs_proc;	/* per-procedure info */
+	const struct svc_procedure *vs_proc;	/* per-procedure info */
+	unsigned int		*vs_count;	/* call counts */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
 	/* Don't register with rpcbind */
@@ -419,15 +420,17 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
-	svc_procfunc		pc_func;	/* process the request */
-	kxdrproc_t		pc_decode;	/* XDR decode args */
-	kxdrproc_t		pc_encode;	/* XDR encode result */
-	kxdrproc_t		pc_release;	/* XDR free result */
+	/* process the request: */
+	__be32			(*pc_func)(struct svc_rqst *);
+	/* XDR decode args: */
+	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
+	/* XDR encode result: */
+	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
+	/* XDR free result: */
+	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
 	unsigned int		pc_ressize;	/* result struct size */
-	unsigned int		pc_count;	/* call count */
 	unsigned int		pc_cachetype;	/* cache info (NFS) */
 	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
 };

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f3787d8..995c6fe 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h

@@ -77,46 +77,25 @@ extern atomic_t rdma_stat_sq_prod;
  */
 struct svc_rdma_op_ctxt {
 	struct list_head list;
-	struct svc_rdma_op_ctxt *read_hdr;
-	struct svc_rdma_fastreg_mr *frmr;
-	int hdr_count;
 	struct xdr_buf arg;
 	struct ib_cqe cqe;
-	struct ib_cqe reg_cqe;
-	struct ib_cqe inv_cqe;
 	u32 byte_len;
-	u32 position;
 	struct svcxprt_rdma *xprt;
-	unsigned long flags;
 	enum dma_data_direction direction;
 	int count;
 	unsigned int mapped_sges;
+	int hdr_count;
 	struct ib_send_wr send_wr;
 	struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
 	struct page *pages[RPCSVC_MAXPAGES];
 };
 
-struct svc_rdma_fastreg_mr {
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	int sg_nents;
-	unsigned long access_flags;
-	enum dma_data_direction direction;
-	struct list_head frmr_list;
-};
-
-#define RDMACTXT_F_LAST_CTXT	2
-
-#define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
-#define	SVCRDMA_DEVCAP_READ_W_INV	2	/* read w/ invalidate */
-
 struct svcxprt_rdma {
 	struct svc_xprt      sc_xprt;		/* SVC transport structure */
 	struct rdma_cm_id    *sc_cm_id;		/* RDMA connection id */
 	struct list_head     sc_accept_q;	/* Conn. waiting accept */
 	int		     sc_ord;		/* RDMA read limit */
 	int                  sc_max_sge;
-	int                  sc_max_sge_rd;	/* max sge for read target */
 	bool		     sc_snd_w_inv;	/* OK to use Send With Invalidate */
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
@@ -141,14 +120,6 @@ struct svcxprt_rdma {
 	struct ib_qp         *sc_qp;
 	struct ib_cq         *sc_rq_cq;
 	struct ib_cq         *sc_sq_cq;
-	int		     (*sc_reader)(struct svcxprt_rdma *,
-					  struct svc_rqst *,
-					  struct svc_rdma_op_ctxt *,
-					  int *, u32 *, u32, u32, u64, bool);
-	u32		     sc_dev_caps;	/* distilled device caps */
-	unsigned int	     sc_frmr_pg_list_len;
-	struct list_head     sc_frmr_q;
-	spinlock_t	     sc_frmr_q_lock;
 
 	spinlock_t	     sc_lock;		/* transport lock */
 
@@ -185,20 +156,14 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
 				    __be32 *rdma_resp,
 				    struct xdr_buf *rcvbuf);
 
-/* svc_rdma_marshal.c */
-extern int svc_rdma_xdr_decode_req(struct xdr_buf *);
-
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
-extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *,
-			       struct svc_rdma_op_ctxt *, int *, u32 *,
-			       u32, u32, u64, bool);
-extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
-				struct svc_rdma_op_ctxt *, int *, u32 *,
-				u32, u32, u64, bool);
 
 /* svc_rdma_rw.c */
 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
+extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
+				    struct svc_rqst *rqstp,
+				    struct svc_rdma_op_ctxt *head, __be32 *p);
 extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
 				     __be32 *wr_ch, struct xdr_buf *xdr);
 extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
@@ -226,9 +191,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
 extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
 extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
 extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
-extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
-			      struct svc_rdma_fastreg_mr *);
 extern void svc_sq_reap(struct svcxprt_rdma *);
 extern void svc_rq_reap(struct svcxprt_rdma *);
 extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 054c8cd..261b48a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h

@@ -17,6 +17,8 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct rpc_rqst;
+
 /*
  * Buffer adjustment
  */
@@ -33,13 +35,6 @@ struct xdr_netobj {
 };
 
 /*
- * This is the legacy generic XDR function. rqstp is either a rpc_rqst
- * (client side) or svc_rqst pointer (server side).
- * Encode functions always assume there's enough room in the buffer.
- */
-typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
-
-/*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
  * and the data payload for short messages), and then an array of
@@ -222,8 +217,10 @@ struct xdr_stream {
 /*
  * These are the xdr_stream style generic XDR encode and decode functions.
  */
-typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
-typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj);
+typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5ab1c98..d83d28e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h

@@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
+extern void lru_add_drain_all_cpuslocked(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
@@ -331,7 +332,7 @@ extern void kswapd_stop(int nid);
 #include <linux/blk_types.h> /* for bio_end_io_t */
 
 /* linux/mm/page_io.c */
-extern int swap_readpage(struct page *);
+extern int swap_readpage(struct page *page, bool do_poll);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
 extern void end_swap_bio_write(struct bio *bio);
 extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -362,7 +363,8 @@ extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
 extern struct page *lookup_swap_cache(swp_entry_t);
 extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
-			struct vm_area_struct *vma, unsigned long addr);
+			struct vm_area_struct *vma, unsigned long addr,
+			bool do_poll);
 extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr,
 			bool *new_page_allocated);

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5c3a5f3..c5ff7b2 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h

@@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void)
 	atomic_long_dec(&num_poisoned_pages);
 }
 
-static inline void num_poisoned_pages_add(long num)
-{
-	atomic_long_add(num, &num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long num)
-{
-	atomic_long_sub(num, &num_poisoned_pages);
-}
 #else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 80d0781..3a89b9f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h

@@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
+extern int proc_douintvec_minmax(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
@@ -143,7 +146,7 @@ struct ctl_table_header
 	struct ctl_table_set *set;
 	struct ctl_dir *parent;
 	struct ctl_node *node;
-	struct list_head inodes; /* head for proc_inode->sysctl_inodes */
+	struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
 };
 
 struct ctl_dir {

diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 9375d23..635a3c5 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h

@@ -33,6 +33,8 @@ struct t10_pi_tuple {
 	__be32 ref_tag;		/* Target LBA or indirect LBA */
 };
 
+#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
+#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 342d2dc..8a642cd 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h

@@ -103,7 +103,7 @@ static __always_inline __must_check
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, true)))
-		return bytes;
+		return 0;
 	else
 		return _copy_to_iter(addr, bytes, i);
 }
@@ -112,7 +112,7 @@ static __always_inline __must_check
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter(addr, bytes, i);
 }
@@ -130,7 +130,7 @@ static __always_inline __must_check
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_nocache(addr, bytes, i);
 }
@@ -160,7 +160,7 @@ static __always_inline __must_check
 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, false)))
-		return bytes;
+		return 0;
 	else
 		return _copy_from_iter_flushcache(addr, bytes, i);
 }

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index f57076b..586809a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h

@@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver(
  */
 extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
 extern void vfio_group_put_external_user(struct vfio_group *group);
+extern bool vfio_external_group_match_file(struct vfio_group *group,
+					   struct file *filep);
 extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);

diff --git a/include/net/sock.h b/include/net/sock.h
index 8c85791f..f69c8c2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h

@@ -246,6 +246,7 @@ struct sock_common {
   *	@sk_policy: flow policy
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
+  *	@sk_tsq_flags: TCP Small Queues flags
   *	@sk_write_queue: Packet sending queue
   *	@sk_omem_alloc: "o" is "option" or "other"
   *	@sk_wmem_queued: persistent queue size
@@ -257,6 +258,7 @@ struct sock_common {
   *	@sk_pacing_status: Pacing status (requested, handled by sch_fq)
   *	@sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
   *	@sk_sndbuf: size of send buffer in bytes
+  *	@__sk_flags_offset: empty field used to determine location of bitfield
   *	@sk_padding: unused element for alignment
   *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
   *	@sk_no_check_rx: allow zero checksum in RX packets
@@ -277,6 +279,7 @@ struct sock_common {
   *	@sk_drops: raw/udp drops counter
   *	@sk_ack_backlog: current listen backlog
   *	@sk_max_ack_backlog: listen backlog set in listen()
+  *	@sk_uid: user id of owner
   *	@sk_priority: %SO_PRIORITY setting
   *	@sk_type: socket type (%SOCK_STREAM, etc)
   *	@sk_protocol: which protocol this socket belongs in this network family

diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 8260700..8c285d9 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h

@@ -158,6 +158,7 @@
 #define READ_32		      0x09
 #define VERIFY_32	      0x0a
 #define WRITE_32	      0x0b
+#define WRITE_VERIFY_32	      0x0c
 #define WRITE_SAME_32	      0x0d
 #define ATA_32		      0x1ff0
 

diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 5f17fb7..0ca1fb0 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h

@@ -66,6 +66,14 @@ struct sock;
 #define TA_DEFAULT_FABRIC_PROT_TYPE	0
 /* TPG status needs to be enabled to return sendtargets discovery endpoint info */
 #define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1
+/*
+ * Used to control the sending of keys with optional to respond state bit,
+ * as a workaround for non RFC compliant initiators,that do not propose,
+ * nor respond to specific keys required for login to complete.
+ *
+ * See iscsi_check_proposer_for_optional_reply() for more details.
+ */
+#define TA_DEFAULT_LOGIN_KEYS_WORKAROUND 1
 
 #define ISCSI_IOV_DATA_BUFFER		5
 
@@ -560,7 +568,6 @@ struct iscsi_conn {
 #define LOGIN_FLAGS_INITIAL_PDU		8
 	unsigned long		login_flags;
 	struct delayed_work	login_work;
-	struct delayed_work	login_cleanup_work;
 	struct iscsi_login	*login;
 	struct timer_list	nopin_timer;
 	struct timer_list	nopin_response_timer;
@@ -769,6 +776,7 @@ struct iscsi_tpg_attrib {
 	u8			t10_pi;
 	u32			fabric_prot_type;
 	u32			tpg_enabled_sendtargets;
+	u32			login_keys_workaround;
 	struct iscsi_portal_group *tpg;
 };
 

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index e475531..e150e39 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h

@@ -2,6 +2,7 @@
 #define TARGET_CORE_BACKEND_H
 
 #include <linux/types.h>
+#include <asm/unaligned.h>
 #include <target/target_core_base.h>
 
 #define TRANSPORT_FLAG_PASSTHROUGH		0x1
@@ -29,16 +30,13 @@ struct target_backend_ops {
 
 	struct se_device *(*alloc_device)(struct se_hba *, const char *);
 	int (*configure_device)(struct se_device *);
+	void (*destroy_device)(struct se_device *);
 	void (*free_device)(struct se_device *device);
 
 	ssize_t (*set_configfs_dev_params)(struct se_device *,
 					   const char *, ssize_t);
 	ssize_t (*show_configfs_dev_params)(struct se_device *, char *);
 
-	void (*transport_complete)(struct se_cmd *cmd,
-				   struct scatterlist *,
-				   unsigned char *);
-
 	sense_reason_t (*parse_cdb)(struct se_cmd *cmd);
 	u32 (*get_device_type)(struct se_device *);
 	sector_t (*get_blocks)(struct se_device *);
@@ -71,6 +69,8 @@ void	target_backend_unregister(const struct target_backend_ops *);
 void	target_complete_cmd(struct se_cmd *, u8);
 void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
+void	transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
+
 sense_reason_t	spc_parse_cdb(struct se_cmd *cmd, unsigned int *size);
 sense_reason_t	spc_emulate_report_luns(struct se_cmd *cmd);
 sense_reason_t	spc_emulate_inquiry_std(struct se_cmd *, unsigned char *);
@@ -104,9 +104,18 @@ bool	target_lun_is_rdonly(struct se_cmd *);
 sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
+struct	se_device *target_find_device(int id, bool do_depend);
+
 bool target_sense_desc_format(struct se_device *dev);
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 				       struct request_queue *q);
 
+
+/* Only use get_unaligned_be24() if reading p - 1 is allowed. */
+static inline uint32_t get_unaligned_be24(const uint8_t *const p)
+{
+	return get_unaligned_be32(p - 1) & 0xffffffU;
+}
+
 #endif /* TARGET_CORE_BACKEND_H */

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0c1dce2..516764f 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h

@@ -188,7 +188,8 @@ enum target_sc_flags_table {
 	TARGET_SCF_BIDI_OP		= 0x01,
 	TARGET_SCF_ACK_KREF		= 0x02,
 	TARGET_SCF_UNKNOWN_SIZE		= 0x04,
-	TARGET_SCF_USE_CPUID	= 0x08,
+	TARGET_SCF_USE_CPUID		= 0x08,
+	TARGET_SCF_LOOKUP_LUN_FROM_TAG	= 0x10,
 };
 
 /* fabric independent task management function values */
@@ -218,7 +219,6 @@ enum tcm_tmrsp_table {
  */
 typedef enum {
 	SCSI_INST_INDEX,
-	SCSI_DEVICE_INDEX,
 	SCSI_AUTH_INTR_INDEX,
 	SCSI_INDEX_TYPE_MAX
 } scsi_index_t;
@@ -701,8 +701,6 @@ struct scsi_port_stats {
 
 struct se_lun {
 	u64			unpacked_lun;
-#define SE_LUN_LINK_MAGIC			0xffff7771
-	u32			lun_link_magic;
 	bool			lun_shutdown;
 	bool			lun_access_ro;
 	u32			lun_index;
@@ -746,8 +744,6 @@ struct se_dev_stat_grps {
 };
 
 struct se_device {
-#define SE_DEV_LINK_MAGIC			0xfeeddeef
-	u32			dev_link_magic;
 	/* RELATIVE TARGET PORT IDENTIFER Counter */
 	u16			dev_rpti_counter;
 	/* Used for SAM Task Attribute ordering */
@@ -800,7 +796,6 @@ struct se_device {
 	struct list_head	delayed_cmd_list;
 	struct list_head	state_list;
 	struct list_head	qf_cmd_list;
-	struct list_head	g_dev_node;
 	/* Pointer to associated SE HBA */
 	struct se_hba		*se_hba;
 	/* T10 Inquiry and VPD WWN Information */
@@ -819,8 +814,6 @@ struct se_device {
 	unsigned char		udev_path[SE_UDEV_PATH_LEN];
 	/* Pointer to template of function pointers for transport */
 	const struct target_backend_ops *transport;
-	/* Linked list for struct se_hba struct se_device list */
-	struct list_head	dev_list;
 	struct se_lun		xcopy_lun;
 	/* Protection Information */
 	int			prot_length;

diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index d7dd142..33d2e3e 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h

@@ -160,6 +160,7 @@ int	target_get_sess_cmd(struct se_cmd *, bool);
 int	target_put_sess_cmd(struct se_cmd *);
 void	target_sess_cmd_list_set_waiting(struct se_session *);
 void	target_wait_for_sess_cmds(struct se_session *);
+void	target_show_cmd(const char *pfx, struct se_cmd *cmd);
 
 int	core_alua_check_nonop_delay(struct se_cmd *);
 

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 15da88c..6f77a27 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h

@@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
 TRACE_DEFINE_ENUM(INMEM_REVOKE);
 TRACE_DEFINE_ENUM(IPU);
 TRACE_DEFINE_ENUM(OPU);
+TRACE_DEFINE_ENUM(HOT);
+TRACE_DEFINE_ENUM(WARM);
+TRACE_DEFINE_ENUM(COLD);
 TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
 TRACE_DEFINE_ENUM(CURSEG_WARM_DATA);
 TRACE_DEFINE_ENUM(CURSEG_COLD_DATA);
@@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
+#define show_block_temp(temp)						\
+	__print_symbolic(temp,						\
+		{ HOT,		"HOT" },				\
+		{ WARM,		"WARM" },				\
+		{ COLD,		"COLD" })
+
 #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO |	\
 			REQ_PREFLUSH | REQ_FUA)
 #define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
@@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__field(block_t, new_blkaddr)
 		__field(int, op)
 		__field(int, op_flags)
+		__field(int, temp)
 		__field(int, type)
 	),
 
@@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		__entry->new_blkaddr	= fio->new_blkaddr;
 		__entry->op		= fio->op;
 		__entry->op_flags	= fio->op_flags;
+		__entry->temp		= fio->temp;
 		__entry->type		= fio->type;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
-		"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s",
+		"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s",
 		show_dev_ino(__entry),
 		(unsigned long)__entry->index,
 		(unsigned long long)__entry->old_blkaddr,
 		(unsigned long long)__entry->new_blkaddr,
 		show_bio_type(__entry->op, __entry->op_flags),
+		show_block_temp(__entry->temp),
 		show_block_type(__entry->type))
 );
 
@@ -790,7 +802,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio,
 	TP_CONDITION(page->mapping)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
+DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write,
 
 	TP_PROTO(struct page *page, struct f2fs_io_info *fio),
 

diff --git a/include/trace/events/i2c.h b/include/trace/events/i2c.h
index 4abb8ea..86a4011 100644
--- a/include/trace/events/i2c.h
+++ b/include/trace/events/i2c.h

@@ -1,4 +1,4 @@
-/* I2C and SMBUS message transfer tracepoints
+/* I2C message transfer tracepoints
  *
  * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -18,7 +18,7 @@
 #include <linux/tracepoint.h>
 
 /*
- * drivers/i2c/i2c-core.c
+ * drivers/i2c/i2c-core-base.c
  */
 extern int i2c_transfer_trace_reg(void);
 extern void i2c_transfer_trace_unreg(void);
@@ -144,228 +144,6 @@ TRACE_EVENT_FN(i2c_result,
 	       i2c_transfer_trace_reg,
 	       i2c_transfer_trace_unreg);
 
-/*
- * i2c_smbus_xfer() write data or procedure call request
- */
-TRACE_EVENT_CONDITION(smbus_write,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol,
-		 const union i2c_smbus_data *data),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
-	TP_CONDITION(read_write == I2C_SMBUS_WRITE ||
-		     protocol == I2C_SMBUS_PROC_CALL ||
-		     protocol == I2C_SMBUS_BLOCK_PROC_CALL),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	addr			)
-		__field(__u16,	flags			)
-		__field(__u8,	command			)
-		__field(__u8,	len			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-
-		switch (protocol) {
-		case I2C_SMBUS_BYTE_DATA:
-			__entry->len = 1;
-			goto copy;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			__entry->len = 2;
-			goto copy;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			__entry->len = data->block[0] + 1;
-		copy:
-			memcpy(__entry->buf, data->block, __entry->len);
-			break;
-		case I2C_SMBUS_QUICK:
-		case I2C_SMBUS_BYTE:
-		case I2C_SMBUS_I2C_BLOCK_BROKEN:
-		default:
-			__entry->len = 0;
-		}
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		  __entry->len,
-		  __entry->len, __entry->buf
-		  ));
-
-/*
- * i2c_smbus_xfer() read data request
- */
-TRACE_EVENT_CONDITION(smbus_read,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol),
-	TP_CONDITION(!(read_write == I2C_SMBUS_WRITE ||
-		       protocol == I2C_SMBUS_PROC_CALL ||
-		       protocol == I2C_SMBUS_BLOCK_PROC_CALL)),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	flags			)
-		__field(__u16,	addr			)
-		__field(__u8,	command			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" })
-		  ));
-
-/*
- * i2c_smbus_xfer() read data or procedure call reply
- */
-TRACE_EVENT_CONDITION(smbus_reply,
-	TP_PROTO(const struct i2c_adapter *adap,
-		 u16 addr, unsigned short flags,
-		 char read_write, u8 command, int protocol,
-		 const union i2c_smbus_data *data),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
-	TP_CONDITION(read_write == I2C_SMBUS_READ),
-	TP_STRUCT__entry(
-		__field(int,	adapter_nr		)
-		__field(__u16,	addr			)
-		__field(__u16,	flags			)
-		__field(__u8,	command			)
-		__field(__u8,	len			)
-		__field(__u32,	protocol		)
-		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
-	TP_fast_assign(
-		__entry->adapter_nr = adap->nr;
-		__entry->addr = addr;
-		__entry->flags = flags;
-		__entry->command = command;
-		__entry->protocol = protocol;
-
-		switch (protocol) {
-		case I2C_SMBUS_BYTE:
-		case I2C_SMBUS_BYTE_DATA:
-			__entry->len = 1;
-			goto copy;
-		case I2C_SMBUS_WORD_DATA:
-		case I2C_SMBUS_PROC_CALL:
-			__entry->len = 2;
-			goto copy;
-		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_BLOCK_PROC_CALL:
-		case I2C_SMBUS_I2C_BLOCK_DATA:
-			__entry->len = data->block[0] + 1;
-		copy:
-			memcpy(__entry->buf, data->block, __entry->len);
-			break;
-		case I2C_SMBUS_QUICK:
-		case I2C_SMBUS_I2C_BLOCK_BROKEN:
-		default:
-			__entry->len = 0;
-		}
-		       ),
-	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
-		  __entry->adapter_nr,
-		  __entry->addr,
-		  __entry->flags,
-		  __entry->command,
-		  __print_symbolic(__entry->protocol,
-				   { I2C_SMBUS_QUICK,		"QUICK"	},
-				   { I2C_SMBUS_BYTE,		"BYTE"	},
-				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		  __entry->len,
-		  __entry->len, __entry->buf
-		  ));
-
-/*
- * i2c_smbus_xfer() result
- */
-TRACE_EVENT(smbus_result,
-	    TP_PROTO(const struct i2c_adapter *adap,
-		     u16 addr, unsigned short flags,
-		     char read_write, u8 command, int protocol,
-		     int res),
-	    TP_ARGS(adap, addr, flags, read_write, command, protocol, res),
-	    TP_STRUCT__entry(
-		    __field(int,	adapter_nr		)
-		    __field(__u16,	addr			)
-		    __field(__u16,	flags			)
-		    __field(__u8,	read_write		)
-		    __field(__u8,	command			)
-		    __field(__s16,	res			)
-		    __field(__u32,	protocol		)
-			     ),
-	    TP_fast_assign(
-		    __entry->adapter_nr = adap->nr;
-		    __entry->addr = addr;
-		    __entry->flags = flags;
-		    __entry->read_write = read_write;
-		    __entry->command = command;
-		    __entry->protocol = protocol;
-		    __entry->res = res;
-			   ),
-	    TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d",
-		      __entry->adapter_nr,
-		      __entry->addr,
-		      __entry->flags,
-		      __entry->command,
-		      __print_symbolic(__entry->protocol,
-				       { I2C_SMBUS_QUICK,		"QUICK"	},
-				       { I2C_SMBUS_BYTE,		"BYTE"	},
-				       { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
-				       { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
-				       { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
-				       { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
-				       { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
-				       { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
-				       { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
-		      __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd",
-		      __entry->res
-		      ));
-
 #endif /* _TRACE_I2C_H */
 
 /* This part must be outside protection */

diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 304ff94..8e50d01 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h

@@ -34,7 +34,7 @@
 	{(unsigned long)__GFP_FS,		"__GFP_FS"},		\
 	{(unsigned long)__GFP_COLD,		"__GFP_COLD"},		\
 	{(unsigned long)__GFP_NOWARN,		"__GFP_NOWARN"},	\
-	{(unsigned long)__GFP_REPEAT,		"__GFP_REPEAT"},	\
+	{(unsigned long)__GFP_RETRY_MAYFAIL,	"__GFP_RETRY_MAYFAIL"},	\
 	{(unsigned long)__GFP_NOFAIL,		"__GFP_NOFAIL"},	\
 	{(unsigned long)__GFP_NORETRY,		"__GFP_NORETRY"},	\
 	{(unsigned long)__GFP_COMP,		"__GFP_COMP"},		\
@@ -257,7 +257,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 
 COMPACTION_STATUS
 COMPACTION_PRIORITY
-COMPACTION_FEEDBACK
+/* COMPACTION_FEEDBACK are defines not enums. Not needed here. */
 ZONE_TYPE
 LRU_NAMES
 

diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h
index 38baeb2..c3c19d4 100644
--- a/include/trace/events/oom.h
+++ b/include/trace/events/oom.h

@@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone,
 			__entry->wmark_check)
 );
 
+TRACE_EVENT(mark_victim,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(wake_reaper,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(start_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(finish_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
+TRACE_EVENT(skip_task_reaping,
+	TP_PROTO(int pid),
+
+	TP_ARGS(pid),
+
+	TP_STRUCT__entry(
+		__field(int, pid)
+	),
+
+	TP_fast_assign(
+		__entry->pid = pid;
+	),
+
+	TP_printk("pid=%d", __entry->pid)
+);
+
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(compact_retry,
 

diff --git a/include/trace/events/smbus.h b/include/trace/events/smbus.h
new file mode 100644
index 0000000..d2fb6e1
--- /dev/null
+++ b/include/trace/events/smbus.h

@@ -0,0 +1,249 @@
+/* SMBUS message transfer tracepoints
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM smbus
+
+#if !defined(_TRACE_SMBUS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SMBUS_H
+
+#include <linux/i2c.h>
+#include <linux/tracepoint.h>
+
+/*
+ * drivers/i2c/i2c-core-smbus.c
+ */
+
+/*
+ * i2c_smbus_xfer() write data or procedure call request
+ */
+TRACE_EVENT_CONDITION(smbus_write,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol,
+		 const union i2c_smbus_data *data),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
+	TP_CONDITION(read_write == I2C_SMBUS_WRITE ||
+		     protocol == I2C_SMBUS_PROC_CALL ||
+		     protocol == I2C_SMBUS_BLOCK_PROC_CALL),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	addr			)
+		__field(__u16,	flags			)
+		__field(__u8,	command			)
+		__field(__u8,	len			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+
+		switch (protocol) {
+		case I2C_SMBUS_BYTE_DATA:
+			__entry->len = 1;
+			goto copy;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			__entry->len = 2;
+			goto copy;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			__entry->len = data->block[0] + 1;
+		copy:
+			memcpy(__entry->buf, data->block, __entry->len);
+			break;
+		case I2C_SMBUS_QUICK:
+		case I2C_SMBUS_BYTE:
+		case I2C_SMBUS_I2C_BLOCK_BROKEN:
+		default:
+			__entry->len = 0;
+		}
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		  __entry->len,
+		  __entry->len, __entry->buf
+		  ));
+
+/*
+ * i2c_smbus_xfer() read data request
+ */
+TRACE_EVENT_CONDITION(smbus_read,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol),
+	TP_CONDITION(!(read_write == I2C_SMBUS_WRITE ||
+		       protocol == I2C_SMBUS_PROC_CALL ||
+		       protocol == I2C_SMBUS_BLOCK_PROC_CALL)),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	flags			)
+		__field(__u16,	addr			)
+		__field(__u8,	command			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" })
+		  ));
+
+/*
+ * i2c_smbus_xfer() read data or procedure call reply
+ */
+TRACE_EVENT_CONDITION(smbus_reply,
+	TP_PROTO(const struct i2c_adapter *adap,
+		 u16 addr, unsigned short flags,
+		 char read_write, u8 command, int protocol,
+		 const union i2c_smbus_data *data),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
+	TP_CONDITION(read_write == I2C_SMBUS_READ),
+	TP_STRUCT__entry(
+		__field(int,	adapter_nr		)
+		__field(__u16,	addr			)
+		__field(__u16,	flags			)
+		__field(__u8,	command			)
+		__field(__u8,	len			)
+		__field(__u32,	protocol		)
+		__array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2)	),
+	TP_fast_assign(
+		__entry->adapter_nr = adap->nr;
+		__entry->addr = addr;
+		__entry->flags = flags;
+		__entry->command = command;
+		__entry->protocol = protocol;
+
+		switch (protocol) {
+		case I2C_SMBUS_BYTE:
+		case I2C_SMBUS_BYTE_DATA:
+			__entry->len = 1;
+			goto copy;
+		case I2C_SMBUS_WORD_DATA:
+		case I2C_SMBUS_PROC_CALL:
+			__entry->len = 2;
+			goto copy;
+		case I2C_SMBUS_BLOCK_DATA:
+		case I2C_SMBUS_BLOCK_PROC_CALL:
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			__entry->len = data->block[0] + 1;
+		copy:
+			memcpy(__entry->buf, data->block, __entry->len);
+			break;
+		case I2C_SMBUS_QUICK:
+		case I2C_SMBUS_I2C_BLOCK_BROKEN:
+		default:
+			__entry->len = 0;
+		}
+		       ),
+	TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]",
+		  __entry->adapter_nr,
+		  __entry->addr,
+		  __entry->flags,
+		  __entry->command,
+		  __print_symbolic(__entry->protocol,
+				   { I2C_SMBUS_QUICK,		"QUICK"	},
+				   { I2C_SMBUS_BYTE,		"BYTE"	},
+				   { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				   { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				   { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				   { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				   { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				   { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				   { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		  __entry->len,
+		  __entry->len, __entry->buf
+		  ));
+
+/*
+ * i2c_smbus_xfer() result
+ */
+TRACE_EVENT(smbus_result,
+	    TP_PROTO(const struct i2c_adapter *adap,
+		     u16 addr, unsigned short flags,
+		     char read_write, u8 command, int protocol,
+		     int res),
+	    TP_ARGS(adap, addr, flags, read_write, command, protocol, res),
+	    TP_STRUCT__entry(
+		    __field(int,	adapter_nr		)
+		    __field(__u16,	addr			)
+		    __field(__u16,	flags			)
+		    __field(__u8,	read_write		)
+		    __field(__u8,	command			)
+		    __field(__s16,	res			)
+		    __field(__u32,	protocol		)
+			     ),
+	    TP_fast_assign(
+		    __entry->adapter_nr = adap->nr;
+		    __entry->addr = addr;
+		    __entry->flags = flags;
+		    __entry->read_write = read_write;
+		    __entry->command = command;
+		    __entry->protocol = protocol;
+		    __entry->res = res;
+			   ),
+	    TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d",
+		      __entry->adapter_nr,
+		      __entry->addr,
+		      __entry->flags,
+		      __entry->command,
+		      __print_symbolic(__entry->protocol,
+				       { I2C_SMBUS_QUICK,		"QUICK"	},
+				       { I2C_SMBUS_BYTE,		"BYTE"	},
+				       { I2C_SMBUS_BYTE_DATA,		"BYTE_DATA" },
+				       { I2C_SMBUS_WORD_DATA,		"WORD_DATA" },
+				       { I2C_SMBUS_PROC_CALL,		"PROC_CALL" },
+				       { I2C_SMBUS_BLOCK_DATA,		"BLOCK_DATA" },
+				       { I2C_SMBUS_I2C_BLOCK_BROKEN,	"I2C_BLOCK_BROKEN" },
+				       { I2C_SMBUS_BLOCK_PROC_CALL,	"BLOCK_PROC_CALL" },
+				       { I2C_SMBUS_I2C_BLOCK_DATA,	"I2C_BLOCK_DATA" }),
+		      __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd",
+		      __entry->res
+		      ));
+
+#endif /* _TRACE_SMBUS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 1953f8d..aa63451 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h

@@ -26,7 +26,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	AUTOFS_PROTO_VERSION
 
 /*
- * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed
+ * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
  * back to the kernel via ioctl from userspace. On architectures where 32- and
  * 64-bit userspace binaries can be executed it's important that the size of
  * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
@@ -49,7 +49,7 @@ struct autofs_packet_hdr {
 
 struct autofs_packet_missing {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	int len;
 	char name[NAME_MAX+1];
 };	

diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h
index 65b72d02..7c6da42 100644
--- a/include/uapi/linux/auto_fs4.h
+++ b/include/uapi/linux/auto_fs4.h

@@ -108,7 +108,7 @@ enum autofs_notify {
 /* v4 multi expire (via pipe) */
 struct autofs_packet_expire_multi {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	int len;
 	char name[NAME_MAX+1];
 };
@@ -123,7 +123,7 @@ union autofs_packet_union {
 /* autofs v5 common packet struct */
 struct autofs_v5_packet {
 	struct autofs_packet_hdr hdr;
-	autofs_wqt_t wait_queue_entry_token;
+	autofs_wqt_t wait_queue_token;
 	__u32 dev;
 	__u64 ino;
 	__u32 uid;

diff --git a/include/uapi/linux/kcmp.h b/include/uapi/linux/kcmp.h
index 84df14b..481e103 100644
--- a/include/uapi/linux/kcmp.h
+++ b/include/uapi/linux/kcmp.h

@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
+#include <linux/types.h>
+
 /* Comparison type */
 enum kcmp_type {
 	KCMP_FILE,
@@ -10,8 +12,16 @@ enum kcmp_type {
 	KCMP_SIGHAND,
 	KCMP_IO,
 	KCMP_SYSVSEM,
+	KCMP_EPOLL_TFD,
 
 	KCMP_TYPES,
 };
 
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+	__u32 efd;		/* epoll file descriptor */
+	__u32 tfd;		/* target file number */
+	__u32 toff;		/* target offset within same numbered sequence */
+};
+
 #endif /* _UAPI_LINUX_KCMP_H */

diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index dd73b90..67eb903 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h

@@ -23,7 +23,7 @@
 struct semid_ds {
 	struct ipc_perm	sem_perm;		/* permissions .. see ipc.h */
 	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
+	__kernel_time_t	sem_ctime;		/* create/last semctl() time */
 	struct sem	*sem_base;		/* ptr to first semaphore in array */
 	struct sem_queue *sem_pending;		/* pending operations to be processed */
 	struct sem_queue **sem_pending_last;	/* last pending operation */

diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index af17b41..24a1c4e 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h

@@ -130,6 +130,11 @@ enum tcmu_genl_cmd {
 	TCMU_CMD_UNSPEC,
 	TCMU_CMD_ADDED_DEVICE,
 	TCMU_CMD_REMOVED_DEVICE,
+	TCMU_CMD_RECONFIG_DEVICE,
+	TCMU_CMD_ADDED_DEVICE_DONE,
+	TCMU_CMD_REMOVED_DEVICE_DONE,
+	TCMU_CMD_RECONFIG_DEVICE_DONE,
+	TCMU_CMD_SET_FEATURES,
 	__TCMU_CMD_MAX,
 };
 #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
@@ -138,6 +143,13 @@ enum tcmu_genl_attr {
 	TCMU_ATTR_UNSPEC,
 	TCMU_ATTR_DEVICE,
 	TCMU_ATTR_MINOR,
+	TCMU_ATTR_PAD,
+	TCMU_ATTR_DEV_CFG,
+	TCMU_ATTR_DEV_SIZE,
+	TCMU_ATTR_WRITECACHE,
+	TCMU_ATTR_CMD_STATUS,
+	TCMU_ATTR_DEVICE_ID,
+	TCMU_ATTR_SUPP_KERN_CMD_REPLY,
 	__TCMU_ATTR_MAX,
 };
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)

diff --git a/init/main.c b/init/main.c
index df58a41..052481f 100644
--- a/init/main.c
+++ b/init/main.c

@@ -518,6 +518,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/*
 	 * Set up the initial canary ASAP:
 	 */
+	add_latent_entropy();
 	boot_init_stack_canary();
 
 	cgroup_init_early();

diff --git a/ipc/msg.c b/ipc/msg.c
index 104926d..5b25e07 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c

@@ -97,11 +97,11 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 
 static void msg_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct msg_queue *msq = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
 
 	security_msg_queue_free(msq);
-	ipc_rcu_free(head);
+	kvfree(msq);
 }
 
 /**
@@ -114,12 +114,12 @@ static void msg_rcu_free(struct rcu_head *head)
 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	struct msg_queue *msq;
-	int id, retval;
+	int retval;
 	key_t key = params->key;
 	int msgflg = params->flg;
 
-	msq = ipc_rcu_alloc(sizeof(*msq));
-	if (!msq)
+	msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+	if (unlikely(!msq))
 		return -ENOMEM;
 
 	msq->q_perm.mode = msgflg & S_IRWXUGO;
@@ -128,7 +128,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		ipc_rcu_putref(msq, ipc_rcu_free);
+		kvfree(msq);
 		return retval;
 	}
 
@@ -142,10 +142,10 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	INIT_LIST_HEAD(&msq->q_senders);
 
 	/* ipc_addid() locks msq upon success. */
-	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-	if (id < 0) {
-		ipc_rcu_putref(msq, msg_rcu_free);
-		return id;
+	retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+	if (retval < 0) {
+		call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+		return retval;
 	}
 
 	ipc_unlock_object(&msq->q_perm);
@@ -249,7 +249,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		free_msg(msg);
 	}
 	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-	ipc_rcu_putref(msq, msg_rcu_free);
+	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 }
 
 /*
@@ -688,7 +688,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		/* enqueue the sender and prepare to block */
 		ss_add(msq, &s, msgsz);
 
-		if (!ipc_rcu_getref(msq)) {
+		if (!ipc_rcu_getref(&msq->q_perm)) {
 			err = -EIDRM;
 			goto out_unlock0;
 		}
@@ -700,7 +700,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		rcu_read_lock();
 		ipc_lock_object(&msq->q_perm);
 
-		ipc_rcu_putref(msq, msg_rcu_free);
+		ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
 		/* raced with RMID? */
 		if (!ipc_valid_object(&msq->q_perm)) {
 			err = -EIDRM;

diff --git a/ipc/sem.c b/ipc/sem.c
index 947dc23..9e70cd7 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c

@@ -87,24 +87,6 @@
 #include <linux/uaccess.h>
 #include "util.h"
 
-/* One semaphore structure for each semaphore in the system. */
-struct sem {
-	int	semval;		/* current value */
-	/*
-	 * PID of the process that last modified the semaphore. For
-	 * Linux, specifically these are:
-	 *  - semop
-	 *  - semctl, via SETVAL and SETALL.
-	 *  - at task exit when performing undo adjustments (see exit_sem).
-	 */
-	int	sempid;
-	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
-	struct list_head pending_alter; /* pending single-sop operations */
-					/* that alter the semaphore */
-	struct list_head pending_const; /* pending single-sop operations */
-					/* that do not alter the semaphore*/
-	time_t	sem_otime;	/* candidate for sem_otime */
-} ____cacheline_aligned_in_smp;
 
 /* One queue for each sleeping process in the system. */
 struct sem_queue {
@@ -175,7 +157,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
  *	sem_array.sem_undo
  *
  * b) global or semaphore sem_lock() for read/write:
- *	sem_array.sem_base[i].pending_{const,alter}:
+ *	sem_array.sems[i].pending_{const,alter}:
  *
  * c) special:
  *	sem_undo_list.list_proc:
@@ -250,7 +232,7 @@ static void unmerge_queues(struct sem_array *sma)
 	 */
 	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
 		struct sem *curr;
-		curr = &sma->sem_base[q->sops[0].sem_num];
+		curr = &sma->sems[q->sops[0].sem_num];
 
 		list_add_tail(&q->list, &curr->pending_alter);
 	}
@@ -270,7 +252,7 @@ static void merge_queues(struct sem_array *sma)
 {
 	int i;
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 
 		list_splice_init(&sem->pending_alter, &sma->pending_alter);
 	}
@@ -278,11 +260,11 @@ static void merge_queues(struct sem_array *sma)
 
 static void sem_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct sem_array *sma = ipc_rcu_to_struct(p);
+	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
+	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
 
 	security_sem_free(sma);
-	ipc_rcu_free(head);
+	kvfree(sma);
 }
 
 /*
@@ -306,7 +288,7 @@ static void complexmode_enter(struct sem_array *sma)
 	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
 
 	for (i = 0; i < sma->sem_nsems; i++) {
-		sem = sma->sem_base + i;
+		sem = &sma->sems[i];
 		spin_lock(&sem->lock);
 		spin_unlock(&sem->lock);
 	}
@@ -366,7 +348,7 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 	 *
 	 * Both facts are tracked by use_global_mode.
 	 */
-	sem = sma->sem_base + sops->sem_num;
+	sem = &sma->sems[sops->sem_num];
 
 	/*
 	 * Initial check for use_global_lock. Just an optimization,
@@ -421,7 +403,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 		complexmode_tryleave(sma);
 		ipc_unlock_object(&sma->sem_perm);
 	} else {
-		struct sem *sem = sma->sem_base + locknum;
+		struct sem *sem = &sma->sems[locknum];
 		spin_unlock(&sem->lock);
 	}
 }
@@ -456,7 +438,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
 	sem_lock(sma, NULL, -1);
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -464,6 +446,24 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
 	ipc_rmid(&sem_ids(ns), &s->sem_perm);
 }
 
+static struct sem_array *sem_alloc(size_t nsems)
+{
+	struct sem_array *sma;
+	size_t size;
+
+	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
+		return NULL;
+
+	size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
+	sma = kvmalloc(size, GFP_KERNEL);
+	if (unlikely(!sma))
+		return NULL;
+
+	memset(sma, 0, size);
+
+	return sma;
+}
+
 /**
  * newary - Create a new semaphore set
  * @ns: namespace
@@ -473,10 +473,8 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  */
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
-	int id;
 	int retval;
 	struct sem_array *sma;
-	int size;
 	key_t key = params->key;
 	int nsems = params->u.nsems;
 	int semflg = params->flg;
@@ -487,29 +485,24 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	if (ns->used_sems + nsems > ns->sc_semmns)
 		return -ENOSPC;
 
-	size = sizeof(*sma) + nsems * sizeof(struct sem);
-	sma = ipc_rcu_alloc(size);
+	sma = sem_alloc(nsems);
 	if (!sma)
 		return -ENOMEM;
 
-	memset(sma, 0, size);
-
 	sma->sem_perm.mode = (semflg & S_IRWXUGO);
 	sma->sem_perm.key = key;
 
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		ipc_rcu_putref(sma, ipc_rcu_free);
+		kvfree(sma);
 		return retval;
 	}
 
-	sma->sem_base = (struct sem *) &sma[1];
-
 	for (i = 0; i < nsems; i++) {
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
-		INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
-		spin_lock_init(&sma->sem_base[i].lock);
+		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
+		INIT_LIST_HEAD(&sma->sems[i].pending_const);
+		spin_lock_init(&sma->sems[i].lock);
 	}
 
 	sma->complex_count = 0;
@@ -520,10 +513,10 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_nsems = nsems;
 	sma->sem_ctime = get_seconds();
 
-	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-	if (id < 0) {
-		ipc_rcu_putref(sma, sem_rcu_free);
-		return id;
+	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
+	if (retval < 0) {
+		call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+		return retval;
 	}
 	ns->used_sems += nsems;
 
@@ -612,7 +605,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	un = q->undo;
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -639,7 +632,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	sop--;
 	pid = q->pid;
 	while (sop >= sops) {
-		sma->sem_base[sop->sem_num].sempid = pid;
+		sma->sems[sop->sem_num].sempid = pid;
 		sop--;
 	}
 
@@ -661,7 +654,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
 	sop--;
 	while (sop >= sops) {
 		sem_op = sop->sem_op;
-		sma->sem_base[sop->sem_num].semval -= sem_op;
+		sma->sems[sop->sem_num].semval -= sem_op;
 		if (sop->sem_flg & SEM_UNDO)
 			un->semadj[sop->sem_num] += sem_op;
 		sop--;
@@ -692,7 +685,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	 * until the operations can go through.
 	 */
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -716,7 +709,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 	}
 
 	for (sop = sops; sop < sops + nsops; sop++) {
-		curr = sma->sem_base + sop->sem_num;
+		curr = &sma->sems[sop->sem_num];
 		sem_op = sop->sem_op;
 		result = curr->semval;
 
@@ -815,7 +808,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum,
 	if (semnum == -1)
 		pending_list = &sma->pending_const;
 	else
-		pending_list = &sma->sem_base[semnum].pending_const;
+		pending_list = &sma->sems[semnum].pending_const;
 
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
 		int error = perform_atomic_semop(sma, q);
@@ -856,7 +849,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		for (i = 0; i < nsops; i++) {
 			int num = sops[i].sem_num;
 
-			if (sma->sem_base[num].semval == 0) {
+			if (sma->sems[num].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, num, wake_q);
 			}
@@ -867,7 +860,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 		 * Assume all were changed.
 		 */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			if (sma->sem_base[i].semval == 0) {
+			if (sma->sems[i].semval == 0) {
 				got_zero = 1;
 				semop_completed |= wake_const_ops(sma, i, wake_q);
 			}
@@ -909,7 +902,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 	if (semnum == -1)
 		pending_list = &sma->pending_alter;
 	else
-		pending_list = &sma->sem_base[semnum].pending_alter;
+		pending_list = &sma->sems[semnum].pending_alter;
 
 again:
 	list_for_each_entry_safe(q, tmp, pending_list, list) {
@@ -922,7 +915,7 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 		 * be in the  per semaphore pending queue, and decrements
 		 * cannot be successful if the value is already 0.
 		 */
-		if (semnum != -1 && sma->sem_base[semnum].semval == 0)
+		if (semnum != -1 && sma->sems[semnum].semval == 0)
 			break;
 
 		error = perform_atomic_semop(sma, q);
@@ -959,9 +952,9 @@ static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *w
 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 {
 	if (sops == NULL) {
-		sma->sem_base[0].sem_otime = get_seconds();
+		sma->sems[0].sem_otime = get_seconds();
 	} else {
-		sma->sem_base[sops[0].sem_num].sem_otime =
+		sma->sems[sops[0].sem_num].sem_otime =
 							get_seconds();
 	}
 }
@@ -1067,9 +1060,9 @@ static int count_semcnt(struct sem_array *sma, ushort semnum,
 	semcnt = 0;
 	/* First: check the simple operations. They are easy to evaluate */
 	if (count_zero)
-		l = &sma->sem_base[semnum].pending_const;
+		l = &sma->sems[semnum].pending_const;
 	else
-		l = &sma->sem_base[semnum].pending_alter;
+		l = &sma->sems[semnum].pending_alter;
 
 	list_for_each_entry(q, l, list) {
 		/* all task on a per-semaphore list sleep on exactly
@@ -1124,7 +1117,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
 	}
 	for (i = 0; i < sma->sem_nsems; i++) {
-		struct sem *sem = sma->sem_base + i;
+		struct sem *sem = &sma->sems[i];
 		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
 			unlink_queue(sma, q);
 			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
@@ -1142,7 +1135,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 	wake_up_q(&wake_q);
 	ns->used_sems -= sma->sem_nsems;
-	ipc_rcu_putref(sma, sem_rcu_free);
+	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1174,9 +1167,9 @@ static time_t get_semotime(struct sem_array *sma)
 	int i;
 	time_t res;
 
-	res = sma->sem_base[0].sem_otime;
+	res = sma->sems[0].sem_otime;
 	for (i = 1; i < sma->sem_nsems; i++) {
-		time_t to = sma->sem_base[i].sem_otime;
+		time_t to = sma->sems[i].sem_otime;
 
 		if (to > res)
 			res = to;
@@ -1325,7 +1318,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 		return -EIDRM;
 	}
 
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	ipc_assert_locked_object(&sma->sem_perm);
 	list_for_each_entry(un, &sma->list_id, list_id)
@@ -1382,15 +1375,16 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			goto out_unlock;
 		}
 		if (nsems > SEMMSL_FAST) {
-			if (!ipc_rcu_getref(sma)) {
+			if (!ipc_rcu_getref(&sma->sem_perm)) {
 				err = -EIDRM;
 				goto out_unlock;
 			}
 			sem_unlock(sma, -1);
 			rcu_read_unlock();
-			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+			sem_io = kvmalloc_array(nsems, sizeof(ushort),
+						GFP_KERNEL);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 
@@ -1402,7 +1396,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			}
 		}
 		for (i = 0; i < sma->sem_nsems; i++)
-			sem_io[i] = sma->sem_base[i].semval;
+			sem_io[i] = sma->sems[i].semval;
 		sem_unlock(sma, -1);
 		rcu_read_unlock();
 		err = 0;
@@ -1415,29 +1409,30 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		int i;
 		struct sem_undo *un;
 
-		if (!ipc_rcu_getref(sma)) {
+		if (!ipc_rcu_getref(&sma->sem_perm)) {
 			err = -EIDRM;
 			goto out_rcu_wakeup;
 		}
 		rcu_read_unlock();
 
 		if (nsems > SEMMSL_FAST) {
-			sem_io = ipc_alloc(sizeof(ushort)*nsems);
+			sem_io = kvmalloc_array(nsems, sizeof(ushort),
+						GFP_KERNEL);
 			if (sem_io == NULL) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				return -ENOMEM;
 			}
 		}
 
 		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
-			ipc_rcu_putref(sma, sem_rcu_free);
+			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 			err = -EFAULT;
 			goto out_free;
 		}
 
 		for (i = 0; i < nsems; i++) {
 			if (sem_io[i] > SEMVMX) {
-				ipc_rcu_putref(sma, sem_rcu_free);
+				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 				err = -ERANGE;
 				goto out_free;
 			}
@@ -1450,8 +1445,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		}
 
 		for (i = 0; i < nsems; i++) {
-			sma->sem_base[i].semval = sem_io[i];
-			sma->sem_base[i].sempid = task_tgid_vnr(current);
+			sma->sems[i].semval = sem_io[i];
+			sma->sems[i].sempid = task_tgid_vnr(current);
 		}
 
 		ipc_assert_locked_object(&sma->sem_perm);
@@ -1476,7 +1471,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		err = -EIDRM;
 		goto out_unlock;
 	}
-	curr = &sma->sem_base[semnum];
+	curr = &sma->sems[semnum];
 
 	switch (cmd) {
 	case GETVAL:
@@ -1500,7 +1495,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	wake_up_q(&wake_q);
 out_free:
 	if (sem_io != fast_sem_io)
-		ipc_free(sem_io);
+		kvfree(sem_io);
 	return err;
 }
 
@@ -1719,7 +1714,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	}
 
 	nsems = sma->sem_nsems;
-	if (!ipc_rcu_getref(sma)) {
+	if (!ipc_rcu_getref(&sma->sem_perm)) {
 		rcu_read_unlock();
 		un = ERR_PTR(-EIDRM);
 		goto out;
@@ -1729,7 +1724,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
-		ipc_rcu_putref(sma, sem_rcu_free);
+		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1932,7 +1927,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	 */
 	if (nsops == 1) {
 		struct sem *curr;
-		curr = &sma->sem_base[sops->sem_num];
+		curr = &sma->sems[sops->sem_num];
 
 		if (alter) {
 			if (sma->complex_count) {
@@ -2146,7 +2141,7 @@ void exit_sem(struct task_struct *tsk)
 
 		/* perform adjustments registered in un */
 		for (i = 0; i < sma->sem_nsems; i++) {
-			struct sem *semaphore = &sma->sem_base[i];
+			struct sem *semaphore = &sma->sems[i];
 			if (un->semadj[i]) {
 				semaphore->semval += un->semadj[i];
 				/*

diff --git a/ipc/shm.c b/ipc/shm.c
index f45c795..28a4448 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c

@@ -174,11 +174,12 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 
 static void shm_rcu_free(struct rcu_head *head)
 {
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-	struct shmid_kernel *shp = ipc_rcu_to_struct(p);
-
+	struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
+							rcu);
+	struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
+							shm_perm);
 	security_shm_free(shp);
-	ipc_rcu_free(head);
+	kvfree(shp);
 }
 
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
@@ -241,7 +242,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 		user_shm_unlock(i_size_read(file_inode(shm_file)),
 				shp->mlock_user);
 	fput(shm_file);
-	ipc_rcu_putref(shp, shm_rcu_free);
+	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 }
 
 /*
@@ -529,7 +530,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	struct file *file;
 	char name[13];
-	int id;
 	vm_flags_t acctflag = 0;
 
 	if (size < SHMMIN || size > ns->shm_ctlmax)
@@ -542,8 +542,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 			ns->shm_tot + numpages > ns->shm_ctlall)
 		return -ENOSPC;
 
-	shp = ipc_rcu_alloc(sizeof(*shp));
-	if (!shp)
+	shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
+	if (unlikely(!shp))
 		return -ENOMEM;
 
 	shp->shm_perm.key = key;
@@ -553,7 +553,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		ipc_rcu_putref(shp, ipc_rcu_free);
+		kvfree(shp);
 		return error;
 	}
 
@@ -598,11 +598,9 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_file = file;
 	shp->shm_creator = current;
 
-	id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
-	if (id < 0) {
-		error = id;
+	error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+	if (error < 0)
 		goto no_id;
-	}
 
 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
 
@@ -624,7 +622,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
-	ipc_rcu_putref(shp, shm_rcu_free);
+	call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
 	return error;
 }
 

diff --git a/ipc/util.c b/ipc/util.c
index caec7b1..1a2cb02 100644
--- a/ipc/util.c
+++ b/ipc/util.c

@@ -232,6 +232,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 
 	idr_preload(GFP_KERNEL);
 
+	atomic_set(&new->refcount, 1);
 	spin_lock_init(&new->lock);
 	new->deleted = false;
 	rcu_read_lock();
@@ -394,70 +395,18 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 	ipcp->deleted = true;
 }
 
-/**
- * ipc_alloc -	allocate ipc space
- * @size: size desired
- *
- * Allocate memory from the appropriate pools and return a pointer to it.
- * NULL is returned if the allocation fails
- */
-void *ipc_alloc(int size)
+int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
-	return kvmalloc(size, GFP_KERNEL);
+	return atomic_inc_not_zero(&ptr->refcount);
 }
 
-/**
- * ipc_free - free ipc space
- * @ptr: pointer returned by ipc_alloc
- *
- * Free a block created with ipc_alloc().
- */
-void ipc_free(void *ptr)
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head))
 {
-	kvfree(ptr);
-}
-
-/**
- * ipc_rcu_alloc - allocate ipc and rcu space
- * @size: size desired
- *
- * Allocate memory for the rcu header structure +  the object.
- * Returns the pointer to the object or NULL upon failure.
- */
-void *ipc_rcu_alloc(int size)
-{
-	/*
-	 * We prepend the allocation with the rcu struct
-	 */
-	struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size);
-	if (unlikely(!out))
-		return NULL;
-	atomic_set(&out->refcount, 1);
-	return out + 1;
-}
-
-int ipc_rcu_getref(void *ptr)
-{
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	return atomic_inc_not_zero(&p->refcount);
-}
-
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
-{
-	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
-
-	if (!atomic_dec_and_test(&p->refcount))
+	if (!atomic_dec_and_test(&ptr->refcount))
 		return;
 
-	call_rcu(&p->rcu, func);
-}
-
-void ipc_rcu_free(struct rcu_head *head)
-{
-	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
-
-	kvfree(p);
+	call_rcu(&ptr->rcu, func);
 }
 
 /**

diff --git a/ipc/util.h b/ipc/util.h
index 60ddccc..c692010 100644
--- a/ipc/util.h
+++ b/ipc/util.h

@@ -47,13 +47,6 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
-struct ipc_rcu {
-	struct rcu_head rcu;
-	atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
-#define ipc_rcu_to_struct(p)  ((void *)(p+1))
-
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -114,22 +107,18 @@ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 /* must be called with ipcp locked */
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
-/* for rare, potentially huge allocations.
- * both function can sleep
- */
-void *ipc_alloc(int size);
-void ipc_free(void *ptr);
-
 /*
  * For allocation that need to be freed by RCU.
  * Objects are reference counted, they start with reference count 1.
  * getref increases the refcount, the putref call that reduces the recount
  * to 0 schedules the rcu destruction. Caller must guarantee locking.
+ *
+ * refcount is initialized by ipc_addid(), before that point call_rcu()
+ * must be used.
  */
-void *ipc_rcu_alloc(int size);
-int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
-void ipc_rcu_free(struct rcu_head *head);
+int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+void ipc_rcu_putref(struct kern_ipc_perm *ptr,
+			void (*func)(struct rcu_head *head));
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);

diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080..4cb8e8b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile

@@ -82,7 +82,7 @@
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568..6db80fc 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c

@@ -14,10 +14,12 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+static unsigned char *vmcoreinfo_data;
+static size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
 
 /*
  * parsing the "crashkernel" commandline
@@ -324,8 +326,23 @@ static void update_vmcoreinfo_note(void)
 	final_note(buf);
 }
 
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+	if (ptr)
+		memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+	vmcoreinfo_data_safecopy = ptr;
+}
+
 void crash_save_vmcoreinfo(void)
 {
+	if (!vmcoreinfo_note)
+		return;
+
+	/* Use the safe copy to generate vmcoreinfo note if have */
+	if (vmcoreinfo_data_safecopy)
+		vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
 	update_vmcoreinfo_note();
 }
@@ -340,7 +357,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
 	r = vscnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 
-	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+	r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
 
 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
 
@@ -356,11 +373,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
 
 phys_addr_t __weak paddr_vmcoreinfo_note(void)
 {
-	return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+	return __pa(vmcoreinfo_note);
 }
 
 static int __init crash_save_vmcoreinfo_init(void)
 {
+	vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+	if (!vmcoreinfo_data) {
+		pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+		return -ENOMEM;
+	}
+
+	vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+						GFP_KERNEL | __GFP_ZERO);
+	if (!vmcoreinfo_note) {
+		free_page((unsigned long)vmcoreinfo_data);
+		vmcoreinfo_data = NULL;
+		pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+		return -ENOMEM;
+	}
+
 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
 

diff --git a/kernel/exit.c b/kernel/exit.c
index 608c977..c5548fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -1639,6 +1639,10 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
 			__WNOTHREAD|__WCLONE|__WALL))
 		return -EINVAL;
 
+	/* -INT_MIN is not defined */
+	if (upid == INT_MIN)
+		return -ESRCH;
+
 	if (upid == -1)
 		type = PIDTYPE_MAX;
 	else if (upid < 0) {

diff --git a/kernel/extable.c b/kernel/extable.c
index 223df4a..38c2412 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c

@@ -55,7 +55,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+	e = search_extable(__start___ex_table,
+			   __stop___ex_table - __start___ex_table, addr);
 	if (!e)
 		e = search_module_extables(addr);
 	return e;

diff --git a/kernel/fork.c b/kernel/fork.c
index 0f69a3e..17921b0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -205,19 +205,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 	void *stack;
 	int i;
 
-	local_irq_disable();
 	for (i = 0; i < NR_CACHED_STACKS; i++) {
-		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+		struct vm_struct *s;
+
+		s = this_cpu_xchg(cached_stacks[i], NULL);
 
 		if (!s)
 			continue;
-		this_cpu_write(cached_stacks[i], NULL);
 
 		tsk->stack_vm_area = s;
-		local_irq_enable();
 		return s->addr;
 	}
-	local_irq_enable();
 
 	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
 				     VMALLOC_START, VMALLOC_END,
@@ -245,19 +243,15 @@ static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
 	if (task_stack_vm_area(tsk)) {
-		unsigned long flags;
 		int i;
 
-		local_irq_save(flags);
 		for (i = 0; i < NR_CACHED_STACKS; i++) {
-			if (this_cpu_read(cached_stacks[i]))
+			if (this_cpu_cmpxchg(cached_stacks[i],
+					NULL, tsk->stack_vm_area) != NULL)
 				continue;
 
-			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
-			local_irq_restore(flags);
 			return;
 		}
-		local_irq_restore(flags);
 
 		vfree_atomic(tsk->stack);
 		return;
@@ -560,7 +554,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	tsk->stack_canary = get_random_long();
+	tsk->stack_canary = get_random_canary();
 #endif
 
 	/*
@@ -579,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	kcov_task_init(tsk);
 
+#ifdef CONFIG_FAULT_INJECTION
+	tsk->fail_nth = 0;
+#endif
+
 	return tsk;
 
 free_stack:

diff --git a/kernel/groups.c b/kernel/groups.c
index d097276..434f666 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c

@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/sort.h>
 #include <linux/syscalls.h>
 #include <linux/user_namespace.h>
 #include <linux/vmalloc.h>
@@ -76,32 +77,18 @@ static int groups_from_user(struct group_info *group_info,
 	return 0;
 }
 
-/* a simple Shell sort */
+static int gid_cmp(const void *_a, const void *_b)
+{
+	kgid_t a = *(kgid_t *)_a;
+	kgid_t b = *(kgid_t *)_b;
+
+	return gid_gt(a, b) - gid_lt(a, b);
+}
+
 static void groups_sort(struct group_info *group_info)
 {
-	int base, max, stride;
-	int gidsetsize = group_info->ngroups;
-
-	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
-		; /* nothing */
-	stride /= 3;
-
-	while (stride) {
-		max = gidsetsize - stride;
-		for (base = 0; base < max; base++) {
-			int left = base;
-			int right = left + stride;
-			kgid_t tmp = group_info->gid[right];
-
-			while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
-				group_info->gid[right] = group_info->gid[left];
-				right = left;
-				left -= stride;
-			}
-			group_info->gid[right] = tmp;
-		}
-		stride /= 3;
-	}
+	sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
+	     gid_cmp, NULL);
 }
 
 /* a simple bsearch */

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6a3b249..127e7cf 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c

@@ -28,12 +28,6 @@
 
 #include <asm/sections.h>
 
-#ifdef CONFIG_KALLSYMS_ALL
-#define all_var 1
-#else
-#define all_var 0
-#endif
-
 /*
  * These will be re-linked against their real values
  * during the second link stage.
@@ -82,7 +76,7 @@ static inline int is_kernel(unsigned long addr)
 
 static int is_ksym_addr(unsigned long addr)
 {
-	if (all_var)
+	if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
 		return is_kernel(addr);
 
 	return is_kernel_text(addr) || is_kernel_inittext(addr);
@@ -280,7 +274,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	if (!symbol_end) {
 		if (is_kernel_inittext(addr))
 			symbol_end = (unsigned long)_einittext;
-		else if (all_var)
+		else if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
 			symbol_end = (unsigned long)_end;
 		else
 			symbol_end = (unsigned long)_etext;

diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa9..ea34ed8 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c

@@ -11,6 +11,10 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
 
 #include <asm/unistd.h>
 
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
 	return err;
 }
 
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	struct file *filp, *filp_epoll, *filp_tgt;
+	struct kcmp_epoll_slot slot;
+	struct files_struct *files;
+
+	if (copy_from_user(&slot, uslot, sizeof(slot)))
+		return -EFAULT;
+
+	filp = get_file_raw_ptr(task1, idx1);
+	if (!filp)
+		return -EBADF;
+
+	files = get_files_struct(task2);
+	if (!files)
+		return -EBADF;
+
+	spin_lock(&files->file_lock);
+	filp_epoll = fcheck_files(files, slot.efd);
+	if (filp_epoll)
+		get_file(filp_epoll);
+	else
+		filp_tgt = ERR_PTR(-EBADF);
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+
+	if (filp_epoll) {
+		filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+		fput(filp_epoll);
+	} else
+
+	if (IS_ERR(filp_tgt))
+		return PTR_ERR(filp_tgt);
+
+	return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+			     struct task_struct *task2,
+			     unsigned long idx1,
+			     struct kcmp_epoll_slot __user *uslot)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		unsigned long, idx1, unsigned long, idx2)
 {
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
 		ret = -EOPNOTSUPP;
 #endif
 		break;
+	case KCMP_EPOLL_TFD:
+		ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+		break;
 	default:
 		ret = -EINVAL;
 		break;

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a..e62ec4d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c

@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	for (i = 0; i < nr_segments; i++) {
 		ret = kimage_load_segment(image, &image->segment[i]);
 		if (ret)

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 154ffb4..1ae7c41 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c

@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
 	return pages;
 }
 
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+	struct page *vmcoreinfo_page;
+	void *safecopy;
+
+	if (image->type != KEXEC_TYPE_CRASH)
+		return 0;
+
+	/*
+	 * For kdump, allocate one vmcoreinfo safe copy from the
+	 * crash memory. as we have arch_kexec_protect_crashkres()
+	 * after kexec syscall, we naturally protect it from write
+	 * (even read) access under kernel direct mapping. But on
+	 * the other hand, we still need to operate it when crash
+	 * happens to generate vmcoreinfo note, hereby we rely on
+	 * vmap for this purpose.
+	 */
+	vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+	if (!vmcoreinfo_page) {
+		pr_warn("Could not allocate vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+	safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!safecopy) {
+		pr_warn("Could not vmap vmcoreinfo buffer\n");
+		return -ENOMEM;
+	}
+
+	image->vmcoreinfo_data_copy = safecopy;
+	crash_update_vmcoreinfo_safecopy(safecopy);
+
+	return 0;
+}
+
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
 	if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
 	if (!image)
 		return;
 
+	if (image->vmcoreinfo_data_copy) {
+		crash_update_vmcoreinfo_safecopy(NULL);
+		vunmap(image->vmcoreinfo_data_copy);
+	}
+
 	kimage_free_extra_pages(image);
 	for_each_kimage_entry(image, ptr, entry) {
 		if (entry & IND_INDIRECTION) {

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 766e7e4..9f48f44 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c

@@ -26,13 +26,6 @@
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
 
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
 static int kexec_calculate_store_digests(struct kimage *image);
 
 /* Architectures can provide this probe function */
@@ -298,6 +291,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 	if (ret)
 		goto out;
 
+	/*
+	 * Some architecture(like S390) may touch the crash memory before
+	 * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+	 */
+	ret = kimage_crash_copy_vmcoreinfo(image);
+	if (ret)
+		goto out;
+
 	ret = kexec_calculate_store_digests(image);
 	if (ret)
 		goto out;

diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 799a8a452..50dfcb0 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h

@@ -17,6 +17,8 @@ extern struct mutex kexec_mutex;
 #ifdef CONFIG_KEXEC_FILE
 #include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
 #endif /* CONFIG_KEXEC_FILE */

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 563f97e..ff68198 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c

@@ -45,8 +45,6 @@
 
 #include <trace/events/module.h>
 
-extern int max_threads;
-
 #define CAP_BSET	(void *)1
 #define CAP_PI		(void *)2
 
@@ -56,6 +54,20 @@ static DEFINE_SPINLOCK(umh_sysctl_lock);
 static DECLARE_RWSEM(umhelper_sem);
 
 #ifdef CONFIG_MODULES
+/*
+ * Assuming:
+ *
+ * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ *		       (u64) THREAD_SIZE * 8UL);
+ *
+ * If you need less than 50 threads would mean we're dealing with systems
+ * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
+ * and this would only be an be an upper limit, after which the OOM killer
+ * would take effect. Systems like these are very unlikely if modules are
+ * enabled.
+ */
+#define MAX_KMOD_CONCURRENT 50
+static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
 
 /*
 	modprobe_path is set via /proc/sys.
@@ -127,10 +139,7 @@ int __request_module(bool wait, const char *fmt, ...)
 {
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
-	unsigned int max_modprobes;
 	int ret;
-	static atomic_t kmod_concurrent = ATOMIC_INIT(0);
-#define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
 	/*
@@ -154,21 +163,7 @@ int __request_module(bool wait, const char *fmt, ...)
 	if (ret)
 		return ret;
 
-	/* If modprobe needs a service that is in a module, we get a recursive
-	 * loop.  Limit the number of running kmod threads to max_threads/2 or
-	 * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
-	 * would be to run the parents of this process, counting how many times
-	 * kmod was invoked.  That would mean accessing the internals of the
-	 * process tables to get the command line, proc_pid_cmdline is static
-	 * and it is not worth changing the proc code just to handle this case. 
-	 * KAO.
-	 *
-	 * "trace the ppid" is simple, but will fail if someone's
-	 * parent exits.  I think this is as good as it gets. --RR
-	 */
-	max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
-	atomic_inc(&kmod_concurrent);
-	if (atomic_read(&kmod_concurrent) > max_modprobes) {
+	if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
 		/* We may be blaming an innocent here, but unlikely */
 		if (kmod_loop_msg < 5) {
 			printk(KERN_ERR
@@ -176,7 +171,6 @@ int __request_module(bool wait, const char *fmt, ...)
 			       module_name);
 			kmod_loop_msg++;
 		}
-		atomic_dec(&kmod_concurrent);
 		return -ENOMEM;
 	}
 
@@ -184,10 +178,12 @@ int __request_module(bool wait, const char *fmt, ...)
 
 	ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 
-	atomic_dec(&kmod_concurrent);
+	atomic_inc(&kmod_concurrent_max);
+
 	return ret;
 }
 EXPORT_SYMBOL(__request_module);
+
 #endif /* CONFIG_MODULES */
 
 static void call_usermodehelper_freeinfo(struct subprocess_info *info)

diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 23cd706..46ba853 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c

@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
 {
 	phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
 	return sprintf(buf, "%pa %x\n", &vmcore_base,
-		       (unsigned int)sizeof(vmcoreinfo_note));
+			(unsigned int)VMCOREINFO_NOTE_SIZE);
 }
 KERNEL_ATTR_RO(vmcoreinfo);
 
@@ -234,7 +234,7 @@ static struct attribute * kernel_attrs[] = {
 	NULL
 };
 
-static struct attribute_group kernel_attr_group = {
+static const struct attribute_group kernel_attr_group = {
 	.attrs = kernel_attrs,
 };
 

diff --git a/kernel/module.c b/kernel/module.c
index b3dbdde..40f983c 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -300,6 +300,7 @@ int unregister_module_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL(unregister_module_notifier);
 
 struct load_info {
+	const char *name;
 	Elf_Ehdr *hdr;
 	unsigned long len;
 	Elf_Shdr *sechdrs;
@@ -600,7 +601,7 @@ static struct module *find_module_all(const char *name, size_t len,
 
 	module_assert_mutex_or_preempt();
 
-	list_for_each_entry(mod, &modules, list) {
+	list_for_each_entry_rcu(mod, &modules, list) {
 		if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
 			continue;
 		if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
@@ -1273,12 +1274,13 @@ static u32 resolve_rel_crc(const s32 *crc)
 	return *(u32 *)((void *)crc + *crc);
 }
 
-static int check_version(Elf_Shdr *sechdrs,
-			 unsigned int versindex,
+static int check_version(const struct load_info *info,
 			 const char *symname,
 			 struct module *mod,
 			 const s32 *crc)
 {
+	Elf_Shdr *sechdrs = info->sechdrs;
+	unsigned int versindex = info->index.vers;
 	unsigned int i, num_versions;
 	struct modversion_info *versions;
 
@@ -1312,17 +1314,16 @@ static int check_version(Elf_Shdr *sechdrs,
 	}
 
 	/* Broken toolchain. Warn once, then let it go.. */
-	pr_warn_once("%s: no symbol version for %s\n", mod->name, symname);
+	pr_warn_once("%s: no symbol version for %s\n", info->name, symname);
 	return 1;
 
 bad_version:
 	pr_warn("%s: disagrees about version of symbol %s\n",
-	       mod->name, symname);
+	       info->name, symname);
 	return 0;
 }
 
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
-					  unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
 					  struct module *mod)
 {
 	const s32 *crc;
@@ -1338,8 +1339,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
 		BUG();
 	}
 	preempt_enable();
-	return check_version(sechdrs, versindex,
-			     VMLINUX_SYMBOL_STR(module_layout), mod, crc);
+	return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
+			     mod, crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
@@ -1353,8 +1354,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
 	return strcmp(amagic, bmagic) == 0;
 }
 #else
-static inline int check_version(Elf_Shdr *sechdrs,
-				unsigned int versindex,
+static inline int check_version(const struct load_info *info,
 				const char *symname,
 				struct module *mod,
 				const s32 *crc)
@@ -1362,8 +1362,7 @@ static inline int check_version(Elf_Shdr *sechdrs,
 	return 1;
 }
 
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
-					  unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
 					  struct module *mod)
 {
 	return 1;
@@ -1399,7 +1398,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
 	if (!sym)
 		goto unlock;
 
-	if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) {
+	if (!check_version(info, name, mod, crc)) {
 		sym = ERR_PTR(-EINVAL);
 		goto getname;
 	}
@@ -1662,21 +1661,6 @@ static inline void remove_notes_attrs(struct module *mod)
 }
 #endif /* CONFIG_KALLSYMS */
 
-static void add_usage_links(struct module *mod)
-{
-#ifdef CONFIG_MODULE_UNLOAD
-	struct module_use *use;
-	int nowarn;
-
-	mutex_lock(&module_mutex);
-	list_for_each_entry(use, &mod->target_list, target_list) {
-		nowarn = sysfs_create_link(use->target->holders_dir,
-					   &mod->mkobj.kobj, mod->name);
-	}
-	mutex_unlock(&module_mutex);
-#endif
-}
-
 static void del_usage_links(struct module *mod)
 {
 #ifdef CONFIG_MODULE_UNLOAD
@@ -1689,6 +1673,26 @@ static void del_usage_links(struct module *mod)
 #endif
 }
 
+static int add_usage_links(struct module *mod)
+{
+	int ret = 0;
+#ifdef CONFIG_MODULE_UNLOAD
+	struct module_use *use;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(use, &mod->target_list, target_list) {
+		ret = sysfs_create_link(use->target->holders_dir,
+					&mod->mkobj.kobj, mod->name);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&module_mutex);
+	if (ret)
+		del_usage_links(mod);
+#endif
+	return ret;
+}
+
 static int module_add_modinfo_attrs(struct module *mod)
 {
 	struct module_attribute *attr;
@@ -1797,13 +1801,18 @@ static int mod_sysfs_setup(struct module *mod,
 	if (err)
 		goto out_unreg_param;
 
-	add_usage_links(mod);
+	err = add_usage_links(mod);
+	if (err)
+		goto out_unreg_modinfo_attrs;
+
 	add_sect_attrs(mod, info);
 	add_notes_attrs(mod, info);
 
 	kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
 	return 0;
 
+out_unreg_modinfo_attrs:
+	module_remove_modinfo_attrs(mod);
 out_unreg_param:
 	module_param_sysfs_remove(mod);
 out_unreg_holders:
@@ -2910,9 +2919,15 @@ static int rewrite_section_headers(struct load_info *info, int flags)
 		info->index.vers = 0; /* Pretend no __versions section! */
 	else
 		info->index.vers = find_sec(info, "__versions");
-	info->index.info = find_sec(info, ".modinfo");
-	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
+	info->index.info = find_sec(info, ".modinfo");
+	if (!info->index.info)
+		info->name = "(missing .modinfo section)";
+	else
+		info->name = get_modinfo(info, "name");
+	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
 	return 0;
 }
 
@@ -2952,21 +2967,29 @@ static struct module *setup_load_info(struct load_info *info, int flags)
 
 	info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
 	if (!info->index.mod) {
-		pr_warn("No module found in object\n");
+		pr_warn("%s: No module found in object\n",
+			info->name ?: "(missing .modinfo name field)");
 		return ERR_PTR(-ENOEXEC);
 	}
 	/* This is temporary: point mod into copy of data. */
 	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
 
+	/*
+	 * If we didn't load the .modinfo 'name' field, fall back to
+	 * on-disk struct mod 'name' field.
+	 */
+	if (!info->name)
+		info->name = mod->name;
+
 	if (info->index.sym == 0) {
-		pr_warn("%s: module has no symbols (stripped?)\n", mod->name);
+		pr_warn("%s: module has no symbols (stripped?)\n", info->name);
 		return ERR_PTR(-ENOEXEC);
 	}
 
 	info->index.pcpu = find_pcpusec(info);
 
 	/* Check module struct version now, before we try to use module. */
-	if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
+	if (!check_modstruct_version(info, mod))
 		return ERR_PTR(-ENOEXEC);
 
 	return mod;
@@ -2987,7 +3010,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 			return err;
 	} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
 		pr_err("%s: version magic '%s' should be '%s'\n",
-		       mod->name, modmagic, vermagic);
+		       info->name, modmagic, vermagic);
 		return -ENOEXEC;
 	}
 
@@ -3237,7 +3260,7 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
 
 /* module_blacklist is a comma-separated list of module names */
 static char *module_blacklist;
-static bool blacklisted(char *module_name)
+static bool blacklisted(const char *module_name)
 {
 	const char *p;
 	size_t len;
@@ -3267,7 +3290,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
 	if (IS_ERR(mod))
 		return mod;
 
-	if (blacklisted(mod->name))
+	if (blacklisted(info->name))
 		return ERR_PTR(-EPERM);
 
 	err = check_modinfo(mod, info, flags);
@@ -4196,7 +4219,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 		goto out;
 
 	e = search_extable(mod->extable,
-			   mod->extable + mod->num_exentries - 1,
+			   mod->num_exentries,
 			   addr);
 out:
 	preempt_enable();

diff --git a/kernel/power/main.c b/kernel/power/main.c
index d401c21..42bd800 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c

@@ -705,7 +705,7 @@ static struct attribute * g[] = {
 	NULL,
 };
 
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
 	.attrs = g,
 };
 

diff --git a/kernel/signal.c b/kernel/signal.c
index 48a59ee..caed913 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -1402,6 +1402,10 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 		return ret;
 	}
 
+	/* -INT_MIN is undefined.  Exclude this case to avoid a UBSAN warning */
+	if (pid == INT_MIN)
+		return -ESRCH;
+
 	read_lock(&tasklist_lock);
 	if (pid != -1) {
 		ret = __kill_pgrp_info(sig, info,

diff --git a/kernel/sys.c b/kernel/sys.c
index 47d9015..2855ee7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c

@@ -1362,7 +1362,7 @@ COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
 
 	ret = do_prlimit(current, resource, NULL, &r);
 	if (!ret) {
-		struct rlimit r32;
+		struct compat_rlimit r32;
 		if (r.rlim_cur > COMPAT_RLIM_INFINITY)
 			r32.rlim_cur = COMPAT_RLIM_INFINITY;
 		else
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_THP_DISABLE:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+		error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
 		break;
 	case PR_SET_THP_DISABLE:
 		if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (down_write_killable(&me->mm->mmap_sem))
 			return -EINTR;
 		if (arg2)
-			me->mm->def_flags |= VM_NOHUGEPAGE;
+			set_bit(MMF_DISABLE_THP, &me->mm->flags);
 		else
-			me->mm->def_flags &= ~VM_NOHUGEPAGE;
+			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		up_write(&me->mm->mmap_sem);
 		break;
 	case PR_MPX_ENABLE_MANAGEMENT:

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a..6648fbb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -174,11 +174,32 @@ extern int no_unaligned_warning;
 
 #ifdef CONFIG_PROC_SYSCTL
 
-#define SYSCTL_WRITES_LEGACY	-1
-#define SYSCTL_WRITES_WARN	 0
-#define SYSCTL_WRITES_STRICT	 1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ * 	to be written, and multiple writes on the same sysctl file descriptor
+ * 	will rewrite the sysctl value, regardless of file position. No warning
+ * 	is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ * 	not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ * 	file position 0 and the value must be fully contained in the buffer
+ * 	sent to the write syscall. If dealing with strings respect the file
+ * 	position, but restrict this to the max length of the buffer, anything
+ * 	passed the max lenght will be ignored. Multiple writes will append
+ * 	to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+	SYSCTL_WRITES_LEGACY		= -1,
+	SYSCTL_WRITES_WARN		= 0,
+	SYSCTL_WRITES_STRICT		= 1,
+};
 
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
 
 static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -880,6 +901,14 @@ static struct ctl_table kern_table[] = {
 #endif
 	},
 	{
+		.procname	= "watchdog_cpumask",
+		.data		= &watchdog_cpumask_bits,
+		.maxlen		= NR_CPUS,
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_cpumask,
+	},
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+	{
 		.procname       = "soft_watchdog",
 		.data           = &soft_watchdog_enabled,
 		.maxlen         = sizeof (int),
@@ -889,13 +918,6 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 	{
-		.procname	= "watchdog_cpumask",
-		.data		= &watchdog_cpumask_bits,
-		.maxlen		= NR_CPUS,
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_cpumask,
-	},
-	{
 		.procname	= "softlockup_panic",
 		.data		= &softlockup_panic,
 		.maxlen		= sizeof(int),
@@ -904,17 +926,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-	{
-		.procname	= "hardlockup_panic",
-		.data		= &hardlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
-	},
-#endif
 #ifdef CONFIG_SMP
 	{
 		.procname	= "softlockup_all_cpu_backtrace",
@@ -925,6 +936,19 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif /* CONFIG_SMP */
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "hardlockup_all_cpu_backtrace",
 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
@@ -936,6 +960,8 @@ static struct ctl_table kern_table[] = {
 	},
 #endif /* CONFIG_SMP */
 #endif
+#endif
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
@@ -1950,6 +1976,32 @@ static void warn_sysctl_write(struct ctl_table *table)
 }
 
 /**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+					   struct ctl_table *table)
+{
+	if (!*ppos)
+		return false;
+
+	switch (sysctl_writes_strict) {
+	case SYSCTL_WRITES_STRICT:
+		return true;
+	case SYSCTL_WRITES_WARN:
+		warn_sysctl_write(table);
+		return false;
+	default:
+		return false;
+	}
+}
+
+/**
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
@@ -1969,8 +2021,8 @@ static void warn_sysctl_write(struct ctl_table *table)
 int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
-		warn_sysctl_write(table);
+	if (write)
+		proc_first_pos_non_zero_ignore(ppos, table);
 
 	return _proc_do_string((char *)(table->data), table->maxlen, write,
 			       (char __user *)buffer, lenp, ppos);
@@ -2128,19 +2180,18 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
 	return 0;
 }
 
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
-				 int *valp,
-				 int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+				  unsigned int *valp,
+				  int write, void *data)
 {
 	if (write) {
-		if (*negp)
+		if (*lvalp > UINT_MAX)
 			return -EINVAL;
 		if (*lvalp > UINT_MAX)
 			return -EINVAL;
 		*valp = *lvalp;
 	} else {
 		unsigned int val = *valp;
-		*negp = false;
 		*lvalp = (unsigned long)val;
 	}
 	return 0;
@@ -2172,17 +2223,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 		conv = do_proc_dointvec_conv;
 
 	if (write) {
-		if (*ppos) {
-			switch (sysctl_writes_strict) {
-			case SYSCTL_WRITES_STRICT:
-				goto out;
-			case SYSCTL_WRITES_WARN:
-				warn_sysctl_write(table);
-				break;
-			default:
-				break;
-			}
-		}
+		if (proc_first_pos_non_zero_ignore(ppos, table))
+			goto out;
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
@@ -2249,6 +2291,146 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 			buffer, lenp, ppos, conv, data);
 }
 
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+			       struct ctl_table *table,
+			       void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned long lval;
+	int err = 0;
+	size_t left;
+	bool neg;
+	char *kbuf = NULL, *p;
+
+	left = *lenp;
+
+	if (proc_first_pos_non_zero_ignore(ppos, table))
+		goto bail_early;
+
+	if (left > PAGE_SIZE - 1)
+		left = PAGE_SIZE - 1;
+
+	p = kbuf = memdup_user_nul(buffer, left);
+	if (IS_ERR(kbuf))
+		return -EINVAL;
+
+	left -= proc_skip_spaces(&p);
+	if (!left) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	err = proc_get_long(&p, &left, &lval, &neg,
+			     proc_wspace_sep,
+			     sizeof(proc_wspace_sep), NULL);
+	if (err || neg) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (conv(&lval, tbl_data, 1, data)) {
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	if (!err && left)
+		left -= proc_skip_spaces(&p);
+
+out_free:
+	kfree(kbuf);
+	if (err)
+		return -EINVAL;
+
+	return 0;
+
+	/* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+	*ppos += *lenp;
+	return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned long lval;
+	int err = 0;
+	size_t left;
+
+	left = *lenp;
+
+	if (conv(&lval, tbl_data, 0, data)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = proc_put_long(&buffer, &left, lval, false);
+	if (err || !left)
+		goto out;
+
+	err = proc_put_char(&buffer, &left, '\n');
+
+out:
+	*lenp -= left;
+	*ppos += *lenp;
+
+	return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+			       int write, void __user *buffer,
+			       size_t *lenp, loff_t *ppos,
+			       int (*conv)(unsigned long *lvalp,
+					   unsigned int *valp,
+					   int write, void *data),
+			       void *data)
+{
+	unsigned int *i, vleft;
+
+	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	i = (unsigned int *) tbl_data;
+	vleft = table->maxlen / sizeof(*i);
+
+	/*
+	 * Arrays are not supported, keep this simple. *Do not* add
+	 * support for them.
+	 */
+	if (vleft != 1) {
+		*lenp = 0;
+		return -EINVAL;
+	}
+
+	if (!conv)
+		conv = do_proc_douintvec_conv;
+
+	if (write)
+		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+					   conv, data);
+	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp, loff_t *ppos,
+			     int (*conv)(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data),
+			     void *data)
+{
+	return __do_proc_douintvec(table->data, table, write,
+				   buffer, lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -2284,8 +2466,8 @@ int proc_dointvec(struct ctl_table *table, int write,
 int proc_douintvec(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return do_proc_dointvec(table, write, buffer, lenp, ppos,
-				do_proc_douintvec_conv, NULL);
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_douintvec_conv, NULL);
 }
 
 /*
@@ -2390,6 +2572,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
+struct do_proc_douintvec_minmax_conv_param {
+	unsigned int *min;
+	unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+					 unsigned int *valp,
+					 int write, void *data)
+{
+	struct do_proc_douintvec_minmax_conv_param *param = data;
+
+	if (write) {
+		unsigned int val = *lvalp;
+
+		if ((param->min && *param->min > val) ||
+		    (param->max && *param->max < val))
+			return -ERANGE;
+
+		if (*lvalp > UINT_MAX)
+			return -EINVAL;
+		*valp = val;
+	} else {
+		unsigned int val = *valp;
+		*lvalp = (unsigned long) val;
+	}
+
+	return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct do_proc_douintvec_minmax_conv_param param = {
+		.min = (unsigned int *) table->extra1,
+		.max = (unsigned int *) table->extra2,
+	};
+	return do_proc_douintvec(table, write, buffer, lenp, ppos,
+				 do_proc_douintvec_minmax_conv, &param);
+}
+
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
@@ -2447,17 +2688,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 	left = *lenp;
 
 	if (write) {
-		if (*ppos) {
-			switch (sysctl_writes_strict) {
-			case SYSCTL_WRITES_STRICT:
-				goto out;
-			case SYSCTL_WRITES_WARN:
-				warn_sysctl_write(table);
-				break;
-			default:
-				break;
-			}
-		}
+		if (proc_first_pos_non_zero_ignore(ppos, table))
+			goto out;
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
@@ -2898,6 +3130,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2940,6 +3178,7 @@ EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
 EXPORT_SYMBOL(proc_dostring);

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 939a158..02e1859 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c

@@ -1346,7 +1346,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
 	 * CTL_KERN/KERN_VERSION is used by older glibc and cannot
 	 * ever go away.
 	 */
-	if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+	if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
 		return;
 
 	if (printk_ratelimit()) {

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2953d55..53f6b64 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -3816,7 +3816,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
 	int exclude_mod = 0;
 	int found = 0;
 	int ret;
-	int clear_filter;
+	int clear_filter = 0;
 
 	if (func) {
 		func_g.type = filter_parse_regex(func, len, &func_g.search,
@@ -3950,7 +3950,7 @@ static int cache_mod(struct trace_array *tr,
 				continue;
 
 			/* no func matches all */
-			if (!func || strcmp(func, "*") == 0 ||
+			if (strcmp(func, "*") == 0 ||
 			    (ftrace_mod->func &&
 			     strcmp(ftrace_mod->func, func) == 0)) {
 				ret = 0;
@@ -3978,6 +3978,7 @@ static int
 ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
 		 int reset, int enable);
 
+#ifdef CONFIG_MODULES
 static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
 			     char *mod, bool enable)
 {
@@ -4068,6 +4069,7 @@ static void process_cached_mods(const char *mod_name)
 
 	kfree(mod);
 }
+#endif
 
 /*
  * We register the module command as a template to show others how

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948ec32..2d0ffcc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c

@@ -1916,7 +1916,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
 {
 	unsigned pid, idx;
 
-	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+	/* treat recording of idle task as a success */
+	if (!tsk->pid)
+		return 1;
+
+	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
 		return 0;
 
 	/*
@@ -2002,7 +2006,11 @@ int trace_find_tgid(int pid)
 
 static int trace_save_tgid(struct task_struct *tsk)
 {
-	if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+	/* treat recording of idle task as a success */
+	if (!tsk->pid)
+		return 1;
+
+	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
 		return 0;
 
 	tgid_map[tsk->pid] = tsk->tgid;
@@ -2029,11 +2037,20 @@ static bool tracing_record_taskinfo_skip(int flags)
  */
 void tracing_record_taskinfo(struct task_struct *task, int flags)
 {
+	bool done;
+
 	if (tracing_record_taskinfo_skip(flags))
 		return;
-	if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
-		return;
-	if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
+
+	/*
+	 * Record as much task information as possible. If some fail, continue
+	 * to try to record the others.
+	 */
+	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
+
+	/* If recording any information failed, retry again soon. */
+	if (!done)
 		return;
 
 	__this_cpu_write(trace_taskinfo_save, false);
@@ -2050,15 +2067,22 @@ void tracing_record_taskinfo(struct task_struct *task, int flags)
 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
 					  struct task_struct *next, int flags)
 {
+	bool done;
+
 	if (tracing_record_taskinfo_skip(flags))
 		return;
 
-	if ((flags & TRACE_RECORD_CMDLINE) &&
-	    (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
-		return;
+	/*
+	 * Record as much task information as possible. If some fail, continue
+	 * to try to record the others.
+	 */
+	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
+	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
+	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
 
-	if ((flags & TRACE_RECORD_TGID) &&
-	    (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+	/* If recording any information failed, retry again soon. */
+	if (!done)
 		return;
 
 	__this_cpu_write(trace_taskinfo_save, false);
@@ -3334,14 +3358,23 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
 				       unsigned int flags)
 {
 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
+	const char tgid_space[] = "          ";
+	const char space[] = "  ";
 
-	seq_printf(m, "#                          %s  _-----=> irqs-off\n",	    tgid ? "          " : "");
-	seq_printf(m, "#                          %s / _----=> need-resched\n",	    tgid ? "          " : "");
-	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",  tgid ? "          " : "");
-	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",    tgid ? "          " : "");
-	seq_printf(m, "#                          %s||| /     delay\n",		    tgid ? "          " : "");
-	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
-	seq_printf(m, "#              | |       | %s||||       |         |\n",	    tgid ? "     |    " : "");
+	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s / _----=> need-resched\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#                          %s||| /     delay\n",
+		   tgid ? tgid_space : space);
+	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
+		   tgid ? "   TGID   " : space);
+	seq_printf(m, "#              | |       | %s||||       |         |\n",
+		   tgid ? "     |    " : space);
 }
 
 void
@@ -4689,6 +4722,76 @@ static const struct file_operations tracing_readme_fops = {
 	.llseek		= generic_file_llseek,
 };
 
+static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	int *ptr = v;
+
+	if (*pos || m->count)
+		ptr++;
+
+	(*pos)++;
+
+	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
+		if (trace_find_tgid(*ptr))
+			return ptr;
+	}
+
+	return NULL;
+}
+
+static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
+{
+	void *v;
+	loff_t l = 0;
+
+	if (!tgid_map)
+		return NULL;
+
+	v = &tgid_map[0];
+	while (l <= *pos) {
+		v = saved_tgids_next(m, v, &l);
+		if (!v)
+			return NULL;
+	}
+
+	return v;
+}
+
+static void saved_tgids_stop(struct seq_file *m, void *v)
+{
+}
+
+static int saved_tgids_show(struct seq_file *m, void *v)
+{
+	int pid = (int *)v - tgid_map;
+
+	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
+	return 0;
+}
+
+static const struct seq_operations tracing_saved_tgids_seq_ops = {
+	.start		= saved_tgids_start,
+	.stop		= saved_tgids_stop,
+	.next		= saved_tgids_next,
+	.show		= saved_tgids_show,
+};
+
+static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+
+	return seq_open(filp, &tracing_saved_tgids_seq_ops);
+}
+
+
+static const struct file_operations tracing_saved_tgids_fops = {
+	.open		= tracing_saved_tgids_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	unsigned int *ptr = v;
@@ -7921,6 +8024,9 @@ static __init int tracer_init_tracefs(void)
 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
 			  NULL, &tracing_saved_cmdlines_size_fops);
 
+	trace_create_file("saved_tgids", 0444, d_tracer,
+			NULL, &tracing_saved_tgids_fops);
+
 	trace_eval_init();
 
 	trace_create_eval_file(d_tracer);

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2c52218..c9b5aa1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c

@@ -598,6 +598,14 @@ static struct notifier_block trace_kprobe_module_nb = {
 	.priority = 1	/* Invoked after kprobe module callback */
 };
 
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+	while (*name++ != '\0')
+		if (*name == ':' || *name == '.')
+			*name = '_';
+}
+
 static int create_trace_kprobe(int argc, char **argv)
 {
 	/*
@@ -736,6 +744,7 @@ static int create_trace_kprobe(int argc, char **argv)
 		else
 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
 				 is_return ? 'r' : 'p', addr);
+		sanitize_event_name(buf);
 		event = buf;
 	}
 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b4a751e..a4df67c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c

@@ -406,6 +406,8 @@ static const struct file_operations stack_trace_fops = {
 	.release	= seq_release,
 };
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
 static int
 stack_trace_filter_open(struct inode *inode, struct file *file)
 {
@@ -423,6 +425,8 @@ static const struct file_operations stack_trace_filter_fops = {
 	.release = ftrace_regex_release,
 };
 
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
 		   void __user *buffer, size_t *lenp,
@@ -477,8 +481,10 @@ static __init int stack_trace_init(void)
 	trace_create_file("stack_trace", 0444, d_tracer,
 			NULL, &stack_trace_fops);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("stack_trace_filter", 0444, d_tracer,
 			  &trace_ops, &stack_trace_filter_fops);
+#endif
 
 	if (stack_trace_filter_buf[0])
 		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 03e0b69..cabe3e9 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c

@@ -29,15 +29,58 @@
 #include <linux/kvm_para.h>
 #include <linux/kthread.h>
 
+/* Watchdog configuration */
 static DEFINE_MUTEX(watchdog_proc_mutex);
 
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+						NMI_WATCHDOG_ENABLED;
 #else
 unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
 #endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		hardlockup_panic = 1;
+	else if (!strncmp(str, "nopanic", 7))
+		hardlockup_panic = 0;
+	else if (!strncmp(str, "0", 1))
+		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+	else if (!strncmp(str, "1", 1))
+		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+	return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 int __read_mostly soft_watchdog_enabled;
+#endif
+
 int __read_mostly watchdog_user_enabled;
 int __read_mostly watchdog_thresh = 10;
 
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
 int __read_mostly sysctl_softlockup_all_cpu_backtrace;
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
 #endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
-	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
 /*
  * The 'watchdog_running' variable is set to 1 when the watchdog threads
  * are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,47 @@ static int __read_mostly watchdog_running;
  * of 'watchdog_running' cannot change while the watchdog is deactivated
  * temporarily (see related code in 'proc' handlers).
  */
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+	return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+	for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
 
 static u64 __read_mostly sample_period;
 
@@ -120,6 +197,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
 	return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 {
 	sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +206,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
 }
 __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
 #endif
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +292,6 @@ void touch_softlockup_watchdog_sync(void)
 	__this_cpu_write(watchdog_touch_ts, 0);
 }
 
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-		return true;
-
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
-	return false;
-}
-
 static int is_softlockup(unsigned long touch_ts)
 {
 	unsigned long now = get_timestamp();
@@ -237,23 +304,23 @@ static int is_softlockup(unsigned long touch_ts)
 	return 0;
 }
 
+/* watchdog detector functions */
+bool is_hardlockup(void)
+{
+	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+
+	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+		return true;
+
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	return false;
+}
+
 static void watchdog_interrupt_count(void)
 {
 	__this_cpu_inc(hrtimer_interrupts);
 }
 
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
-	return 0;
-}
-void __weak watchdog_nmi_disable(unsigned int cpu)
-{
-}
-
 static int watchdog_enable_all_cpus(void);
 static void watchdog_disable_all_cpus(void);
 
@@ -502,57 +569,6 @@ static void watchdog_unpark_threads(void)
 		kthread_unpark(per_cpu(softlockup_watchdog, cpu));
 }
 
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
-	int ret = 0;
-
-	get_online_cpus();
-	mutex_lock(&watchdog_proc_mutex);
-	/*
-	 * Multiple suspend requests can be active in parallel (counted by
-	 * the 'watchdog_suspended' variable). If the watchdog threads are
-	 * running, the first caller takes care that they will be parked.
-	 * The state of 'watchdog_running' cannot change while a suspend
-	 * request is active (see related code in 'proc' handlers).
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		ret = watchdog_park_threads();
-
-	if (ret == 0)
-		watchdog_suspended++;
-	else {
-		watchdog_disable_all_cpus();
-		pr_err("Failed to suspend lockup detectors, disabled\n");
-		watchdog_enabled = 0;
-	}
-
-	mutex_unlock(&watchdog_proc_mutex);
-
-	return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
-	mutex_lock(&watchdog_proc_mutex);
-
-	watchdog_suspended--;
-	/*
-	 * The watchdog threads are unparked if they were previously running
-	 * and if there is no more active suspend request.
-	 */
-	if (watchdog_running && !watchdog_suspended)
-		watchdog_unpark_threads();
-
-	mutex_unlock(&watchdog_proc_mutex);
-	put_online_cpus();
-}
-
 static int update_watchdog_all_cpus(void)
 {
 	int ret;
@@ -605,6 +621,100 @@ static void watchdog_disable_all_cpus(void)
 }
 
 #ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+	return smpboot_update_cpumask_percpu_thread(
+		    &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+	return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+	return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+	return 0;
+}
+#endif
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+	int ret = 0;
+
+	get_online_cpus();
+	mutex_lock(&watchdog_proc_mutex);
+	/*
+	 * Multiple suspend requests can be active in parallel (counted by
+	 * the 'watchdog_suspended' variable). If the watchdog threads are
+	 * running, the first caller takes care that they will be parked.
+	 * The state of 'watchdog_running' cannot change while a suspend
+	 * request is active (see related code in 'proc' handlers).
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		ret = watchdog_park_threads();
+
+	if (ret == 0)
+		watchdog_suspended++;
+	else {
+		watchdog_disable_all_cpus();
+		pr_err("Failed to suspend lockup detectors, disabled\n");
+		watchdog_enabled = 0;
+	}
+
+	watchdog_nmi_reconfigure();
+
+	mutex_unlock(&watchdog_proc_mutex);
+
+	return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+	mutex_lock(&watchdog_proc_mutex);
+
+	watchdog_suspended--;
+	/*
+	 * The watchdog threads are unparked if they were previously running
+	 * and if there is no more active suspend request.
+	 */
+	if (watchdog_running && !watchdog_suspended)
+		watchdog_unpark_threads();
+
+	watchdog_nmi_reconfigure();
+
+	mutex_unlock(&watchdog_proc_mutex);
+	put_online_cpus();
+}
+
+#ifdef CONFIG_SYSCTL
 
 /*
  * Update the run state of the lockup detectors.
@@ -625,6 +735,8 @@ static int proc_watchdog_update(void)
 	else
 		watchdog_disable_all_cpus();
 
+	watchdog_nmi_reconfigure();
+
 	return err;
 
 }
@@ -810,10 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 			 * a temporary cpumask, so we are likely not in a
 			 * position to do much else to make things better.
 			 */
-			if (smpboot_update_cpumask_percpu_thread(
-				    &watchdog_threads, &watchdog_cpumask) != 0)
+			if (watchdog_update_cpus() != 0)
 				pr_err("cpumask update failed\n");
 		}
+
+		watchdog_nmi_reconfigure();
 	}
 out:
 	mutex_unlock(&watchdog_proc_mutex);

diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 54a427d..295a0d8 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c

@@ -22,41 +22,9 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
-			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
-	watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
 
-static int __init hardlockup_panic_setup(char *str)
-{
-	if (!strncmp(str, "panic", 5))
-		hardlockup_panic = 1;
-	else if (!strncmp(str, "nopanic", 7))
-		hardlockup_panic = 0;
-	else if (!strncmp(str, "0", 1))
-		watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-	else if (!strncmp(str, "1", 1))
-		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-	return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
 {
 	/*
 	 * Using __raw here because some code paths have
@@ -66,9 +34,8 @@ void touch_nmi_watchdog(void)
 	 * going off.
 	 */
 	raw_cpu_write(watchdog_nmi_touch, true);
-	touch_softlockup_watchdog();
 }
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ca9460f..b0d01c6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -778,34 +778,45 @@
 menu "Debug Lockups and Hangs"
 
 config LOCKUP_DETECTOR
-	bool "Detect Hard and Soft Lockups"
+	bool
+
+config SOFTLOCKUP_DETECTOR
+	bool "Detect Soft Lockups"
 	depends on DEBUG_KERNEL && !S390
+	select LOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to act as a watchdog to detect
-	  hard and soft lockups.
+	  soft lockups.
 
 	  Softlockups are bugs that cause the kernel to loop in kernel
 	  mode for more than 20 seconds, without giving other tasks a
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config HARDLOCKUP_DETECTOR_PERF
+	bool
+	select SOFTLOCKUP_DETECTOR
+
+#
+# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
+# lockup detector rather than the perf based detector.
+#
+config HARDLOCKUP_DETECTOR
+	bool "Detect Hard Lockups"
+	depends on DEBUG_KERNEL && !S390
+	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+	select LOCKUP_DETECTOR
+	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+	select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
+	help
+	  Say Y here to enable the kernel to act as a watchdog to detect
+	  hard lockups.
+
 	  Hardlockups are bugs that cause the CPU to loop in kernel mode
 	  for more than 10 seconds, without letting other interrupts have a
 	  chance to run.  The current stack trace is displayed upon detection
 	  and the system will stay locked up.
 
-	  The overhead should be minimal.  A periodic hrtimer runs to
-	  generate interrupts and kick the watchdog task every 4 seconds.
-	  An NMI is generated every 10 seconds or so to check for hardlockups.
-
-	  The frequency of hrtimer and NMI events and the soft and hard lockup
-	  thresholds can be controlled through the sysctl watchdog_thresh.
-
-config HARDLOCKUP_DETECTOR
-	def_bool y
-	depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG
-	depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
-
 config BOOTPARAM_HARDLOCKUP_PANIC
 	bool "Panic (Reboot) On Hard Lockups"
 	depends on HARDLOCKUP_DETECTOR
@@ -826,7 +837,7 @@
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
@@ -843,7 +854,7 @@
 
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	int
-	depends on LOCKUP_DETECTOR
+	depends on SOFTLOCKUP_DETECTOR
 	range 0 1
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -851,7 +862,7 @@
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
-	default LOCKUP_DETECTOR
+	default SOFTLOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
@@ -1594,7 +1605,7 @@
 
 config INTERVAL_TREE_TEST
 	tristate "Interval tree test"
-	depends on m && DEBUG_KERNEL
+	depends on DEBUG_KERNEL
 	select INTERVAL_TREE
 	help
 	  A benchmark measuring the performance of the interval tree library
@@ -1785,6 +1796,17 @@
 
 	  If unsure, say N.
 
+config TEST_SYSCTL
+	tristate "sysctl test driver"
+	default n
+	depends on PROC_SYSCTL
+	help
+	  This builds the "test_sysctl" module. This driver enables to test the
+	  proc sysctl interfaces available to drivers safely without affecting
+	  production knobs which might alter system functionality.
+
+	  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	default n

diff --git a/lib/Makefile b/lib/Makefile
index 5a00832..85e91e5 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -46,6 +46,7 @@
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 08c6ef3..9a53280 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c

@@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-void bitmap_set(unsigned long *map, unsigned int start, int len)
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len)
 		*p |= mask_to_set;
 	}
 }
-EXPORT_SYMBOL(bitmap_set);
+EXPORT_SYMBOL(__bitmap_set);
 
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
@@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
 		*p &= ~mask_to_clear;
 	}
 }
-EXPORT_SYMBOL(bitmap_clear);
+EXPORT_SYMBOL(__bitmap_clear);
 
 /**
  * bitmap_find_next_zero_area_off - find a contiguous aligned zero area

diff --git a/lib/bsearch.c b/lib/bsearch.c
index e33c179..18b445b 100644
--- a/lib/bsearch.c
+++ b/lib/bsearch.c

@@ -33,19 +33,21 @@
 void *bsearch(const void *key, const void *base, size_t num, size_t size,
 	      int (*cmp)(const void *key, const void *elt))
 {
-	size_t start = 0, end = num;
+	const char *pivot;
 	int result;
 
-	while (start < end) {
-		size_t mid = start + (end - start) / 2;
+	while (num > 0) {
+		pivot = base + (num >> 1) * size;
+		result = cmp(key, pivot);
 
-		result = cmp(key, base + mid * size);
-		if (result < 0)
-			end = mid;
-		else if (result > 0)
-			start = mid + 1;
-		else
-			return (void *)base + mid * size;
+		if (result == 0)
+			return (void *)pivot;
+
+		if (result > 0) {
+			base = pivot + size;
+			num--;
+		}
+		num >>= 1;
 	}
 
 	return NULL;

diff --git a/lib/extable.c b/lib/extable.c
index 62968da..f54996fd 100644
--- a/lib/extable.c
+++ b/lib/extable.c

@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bsearch.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sort.h>
@@ -51,7 +52,7 @@ static void swap_ex(void *a, void *b, int size)
  * This is used both for the kernel exception table and for
  * the exception tables of modules that get loaded.
  */
-static int cmp_ex(const void *a, const void *b)
+static int cmp_ex_sort(const void *a, const void *b)
 {
 	const struct exception_table_entry *x = a, *y = b;
 
@@ -67,7 +68,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, swap_ex);
+	     cmp_ex_sort, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -93,6 +94,20 @@ void trim_init_extable(struct module *m)
 #endif /* !ARCH_HAS_SORT_EXTABLE */
 
 #ifndef ARCH_HAS_SEARCH_EXTABLE
+
+static int cmp_ex_search(const void *key, const void *elt)
+{
+	const struct exception_table_entry *_elt = elt;
+	unsigned long _key = *(unsigned long *)key;
+
+	/* avoid overflow */
+	if (_key > ex_to_insn(_elt))
+		return 1;
+	if (_key < ex_to_insn(_elt))
+		return -1;
+	return 0;
+}
+
 /*
  * Search one exception table for an entry corresponding to the
  * given instruction address, and return the address of the entry,
@@ -101,25 +116,11 @@ void trim_init_extable(struct module *m)
  * already sorted.
  */
 const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-	       const struct exception_table_entry *last,
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
 	       unsigned long value)
 {
-	while (first <= last) {
-		const struct exception_table_entry *mid;
-
-		mid = ((last - first) >> 1) + first;
-		/*
-		 * careful, the distance between value and insn
-		 * can be larger than MAX_LONG:
-		 */
-		if (ex_to_insn(mid) < value)
-			first = mid + 1;
-		else if (ex_to_insn(mid) > value)
-			last = mid - 1;
-		else
-			return mid;
-	}
-	return NULL;
+	return bsearch(&value, base, num,
+		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
 #endif

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4ff1571..09ac73c1 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c

@@ -107,6 +107,12 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail(struct fault_attr *attr, ssize_t size)
 {
+	if (in_task() && current->fail_nth) {
+		if (--current->fail_nth == 0)
+			goto fail;
+		return false;
+	}
+
 	/* No need to check any other properties if the probability is 0 */
 	if (attr->probability == 0)
 		return false;
@@ -134,6 +140,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
 	if (!fail_stacktrace(attr))
 		return false;
 
+fail:
 	fail_dump(attr);
 
 	if (atomic_read(&attr->times) != -1)

diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 245900b..df495fe 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c

@@ -1,27 +1,38 @@
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/interval_tree.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <asm/timex.h>
 
-#define NODES        100
-#define PERF_LOOPS   100000
-#define SEARCHES     100
-#define SEARCH_LOOPS 10000
+#define __param(type, name, init, msg)		\
+	static type name = init;		\
+	module_param(name, type, 0444);		\
+	MODULE_PARM_DESC(name, msg);
+
+__param(int, nnodes, 100, "Number of nodes in the interval tree");
+__param(int, perf_loops, 100000, "Number of iterations modifying the tree");
+
+__param(int, nsearches, 100, "Number of searches to the interval tree");
+__param(int, search_loops, 10000, "Number of iterations searching the tree");
+__param(bool, search_all, false, "Searches will iterate all nodes in the tree");
+
+__param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint");
 
 static struct rb_root root = RB_ROOT;
-static struct interval_tree_node nodes[NODES];
-static u32 queries[SEARCHES];
+static struct interval_tree_node *nodes = NULL;
+static u32 *queries = NULL;
 
 static struct rnd_state rnd;
 
 static inline unsigned long
-search(unsigned long query, struct rb_root *root)
+search(struct rb_root *root, unsigned long start, unsigned long last)
 {
 	struct interval_tree_node *node;
 	unsigned long results = 0;
 
-	for (node = interval_tree_iter_first(root, query, query); node;
-	     node = interval_tree_iter_next(node, query, query))
+	for (node = interval_tree_iter_first(root, start, last); node;
+	     node = interval_tree_iter_next(node, start, last))
 		results++;
 	return results;
 }
@@ -29,19 +40,22 @@ search(unsigned long query, struct rb_root *root)
 static void init(void)
 {
 	int i;
-	for (i = 0; i < NODES; i++) {
-		u32 a = prandom_u32_state(&rnd);
-		u32 b = prandom_u32_state(&rnd);
-		if (a <= b) {
-			nodes[i].start = a;
-			nodes[i].last = b;
-		} else {
-			nodes[i].start = b;
-			nodes[i].last = a;
-		}
+
+	for (i = 0; i < nnodes; i++) {
+		u32 b = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
+		u32 a = (prandom_u32_state(&rnd) >> 4) % b;
+
+		nodes[i].start = a;
+		nodes[i].last = b;
 	}
-	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom_u32_state(&rnd);
+
+	/*
+	 * Limit the search scope to what the user defined.
+	 * Otherwise we are merely measuring empty walks,
+	 * which is pointless.
+	 */
+	for (i = 0; i < nsearches; i++)
+		queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint;
 }
 
 static int interval_tree_test_init(void)
@@ -50,6 +64,16 @@ static int interval_tree_test_init(void)
 	unsigned long results;
 	cycles_t time1, time2, time;
 
+	nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL);
+	if (!nodes)
+		return -ENOMEM;
+
+	queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL);
+	if (!queries) {
+		kfree(nodes);
+		return -ENOMEM;
+	}
+
 	printk(KERN_ALERT "interval tree insert/remove");
 
 	prandom_seed_state(&rnd, 3141592653589793238ULL);
@@ -57,39 +81,46 @@ static int interval_tree_test_init(void)
 
 	time1 = get_cycles();
 
-	for (i = 0; i < PERF_LOOPS; i++) {
-		for (j = 0; j < NODES; j++)
+	for (i = 0; i < perf_loops; i++) {
+		for (j = 0; j < nnodes; j++)
 			interval_tree_insert(nodes + j, &root);
-		for (j = 0; j < NODES; j++)
+		for (j = 0; j < nnodes; j++)
 			interval_tree_remove(nodes + j, &root);
 	}
 
 	time2 = get_cycles();
 	time = time2 - time1;
 
-	time = div_u64(time, PERF_LOOPS);
+	time = div_u64(time, perf_loops);
 	printk(" -> %llu cycles\n", (unsigned long long)time);
 
 	printk(KERN_ALERT "interval tree search");
 
-	for (j = 0; j < NODES; j++)
+	for (j = 0; j < nnodes; j++)
 		interval_tree_insert(nodes + j, &root);
 
 	time1 = get_cycles();
 
 	results = 0;
-	for (i = 0; i < SEARCH_LOOPS; i++)
-		for (j = 0; j < SEARCHES; j++)
-			results += search(queries[j], &root);
+	for (i = 0; i < search_loops; i++)
+		for (j = 0; j < nsearches; j++) {
+			unsigned long start = search_all ? 0 : queries[j];
+			unsigned long last = search_all ? max_endpoint : queries[j];
+
+			results += search(&root, start, last);
+		}
 
 	time2 = get_cycles();
 	time = time2 - time1;
 
-	time = div_u64(time, SEARCH_LOOPS);
-	results = div_u64(results, SEARCH_LOOPS);
+	time = div_u64(time, search_loops);
+	results = div_u64(results, search_loops);
 	printk(" -> %llu cycles (%lu results)\n",
 	       (unsigned long long)time, results);
 
+	kfree(queries);
+	kfree(nodes);
+
 	return -EAGAIN; /* Fail will directly unload the module */
 }
 

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index bf85e05..7201440 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c

@@ -51,13 +51,15 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 
 	res = 0;
 	rv = 0;
-	while (*s) {
+	while (1) {
+		unsigned int c = *s;
+		unsigned int lc = c | 0x20; /* don't tolower() this line */
 		unsigned int val;
 
-		if ('0' <= *s && *s <= '9')
-			val = *s - '0';
-		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
-			val = _tolower(*s) - 'a' + 10;
+		if ('0' <= c && c <= '9')
+			val = c - '0';
+		else if ('a' <= lc && lc <= 'f')
+			val = lc - 'a' + 10;
 		else
 			break;
 

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 8ee7e5e..3bf4a99 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c

@@ -72,6 +72,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
+/**
+ * This function is both preempt and irq safe. The former is due to explicit
+ * preemption disable. The latter is guaranteed by the fact that the slow path
+ * is explicitly protected by an irq-safe spinlock whereas the fast patch uses
+ * this_cpu_add which is irq-safe by definition. Hence there is no need muck
+ * with irq state before calling this one
+ */
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index d9e7274..42466c1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c

@@ -211,11 +211,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
-	    gfp != GFP_KERNEL)
+	if (gfp != GFP_KERNEL)
 		tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
-	if (tbl == NULL && gfp == GFP_KERNEL)
-		tbl = vzalloc(size);
+	else
+		tbl = kvzalloc(size, gfp);
 
 	size = nbuckets;
 

diff --git a/lib/string.c b/lib/string.c
index 1c1fc91..ebbb99c 100644
--- a/lib/string.c
+++ b/lib/string.c

@@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new)
 	return s;
 }
 EXPORT_SYMBOL(strreplace);
+
+void fortify_panic(const char *name)
+{
+	pr_emerg("detected buffer overflow in %s\n", name);
+	BUG();
+}
+EXPORT_SYMBOL(fortify_panic);

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index e2cbd43..2526a29 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c

@@ -333,10 +333,39 @@ static void __init test_bitmap_u32_array_conversions(void)
 	}
 }
 
+static void noinline __init test_mem_optimisations(void)
+{
+	DECLARE_BITMAP(bmap1, 1024);
+	DECLARE_BITMAP(bmap2, 1024);
+	unsigned int start, nbits;
+
+	for (start = 0; start < 1024; start += 8) {
+		memset(bmap1, 0x5a, sizeof(bmap1));
+		memset(bmap2, 0x5a, sizeof(bmap2));
+		for (nbits = 0; nbits < 1024 - start; nbits += 8) {
+			bitmap_set(bmap1, start, nbits);
+			__bitmap_set(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("set not __equal %d %d\n", start, nbits);
+
+			bitmap_clear(bmap1, start, nbits);
+			__bitmap_clear(bmap2, start, nbits);
+			if (!bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not equal %d %d\n", start, nbits);
+			if (!__bitmap_equal(bmap1, bmap2, 1024))
+				printk("clear not __equal %d %d\n", start,
+									nbits);
+		}
+	}
+}
+
 static int __init test_bitmap_init(void)
 {
 	test_zero_fill_copy();
 	test_bitmap_u32_array_conversions();
+	test_mem_optimisations();
 
 	if (failed_tests == 0)
 		pr_info("all %u tests passed\n", total_tests);

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
new file mode 100644
index 0000000..3dd801c
--- /dev/null
+++ b/lib/test_sysctl.c

@@ -0,0 +1,148 @@
+/*
+ * proc sysctl test driver
+ *
+ * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or at your option any
+ * later version; or, when distributed separately from the Linux kernel or
+ * when incorporated into other software packages, subject to the following
+ * license:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of copyleft-next (version 0.3.1 or later) as published
+ * at http://copyleft-next.org/.
+ */
+
+/*
+ * This module provides an interface to the the proc sysctl interfaces.  This
+ * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
+ * system unless explicitly requested by name. You can also build this driver
+ * into your kernel.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/async.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+
+static int i_zero;
+static int i_one_hundred = 100;
+
+struct test_sysctl_data {
+	int int_0001;
+	int int_0002;
+	int int_0003[4];
+
+	unsigned int uint_0001;
+
+	char string_0001[65];
+};
+
+static struct test_sysctl_data test_data = {
+	.int_0001 = 60,
+	.int_0002 = 1,
+
+	.int_0003[0] = 0,
+	.int_0003[1] = 1,
+	.int_0003[2] = 2,
+	.int_0003[3] = 3,
+
+	.uint_0001 = 314,
+
+	.string_0001 = "(none)",
+};
+
+/* These are all under /proc/sys/debug/test_sysctl/ */
+static struct ctl_table test_table[] = {
+	{
+		.procname	= "int_0001",
+		.data		= &test_data.int_0001,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &i_zero,
+		.extra2         = &i_one_hundred,
+	},
+	{
+		.procname	= "int_0002",
+		.data		= &test_data.int_0002,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "int_0003",
+		.data		= &test_data.int_0003,
+		.maxlen		= sizeof(test_data.int_0003),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "uint_0001",
+		.data		= &test_data.uint_0001,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec,
+	},
+	{
+		.procname	= "string_0001",
+		.data		= &test_data.string_0001,
+		.maxlen		= sizeof(test_data.string_0001),
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{ }
+};
+
+static struct ctl_table test_sysctl_table[] = {
+	{
+		.procname	= "test_sysctl",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= test_table,
+	},
+	{ }
+};
+
+static struct ctl_table test_sysctl_root_table[] = {
+	{
+		.procname	= "debug",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= test_sysctl_table,
+	},
+	{ }
+};
+
+static struct ctl_table_header *test_sysctl_header;
+
+static int __init test_sysctl_init(void)
+{
+	test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+	if (!test_sysctl_header)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(test_sysctl_init);
+
+static void __exit test_sysctl_exit(void)
+{
+	if (test_sysctl_header)
+		unregister_sysctl_table(test_sysctl_header);
+}
+
+module_exit(test_sysctl_exit);
+
+MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>");
+MODULE_LICENSE("GPL");

diff --git a/mm/Kconfig b/mm/Kconfig
index 46ef77d..48b1af4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig

@@ -161,7 +161,6 @@
 	bool "Allow for memory hot-add"
 	depends on SPARSEMEM || X86_64_ACPI_NUMA
 	depends on ARCH_ENABLE_MEMORY_HOTPLUG
-	depends on COMPILE_TEST || !KASAN
 
 config MEMORY_HOTPLUG_SPARSE
 	def_bool y

diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index da91df5..9075aa5 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c

@@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 {
 	unsigned long flags;
 	struct page *page = alloc_page(balloon_mapping_gfp_mask() |
-					__GFP_NOMEMALLOC | __GFP_NORETRY);
+				__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO);
 	if (!page)
 		return NULL;
 

diff --git a/mm/cma.c b/mm/cma.c
index 978b4a1..c0da318 100644
--- a/mm/cma.c
+++ b/mm/cma.c

@@ -59,7 +59,7 @@ const char *cma_get_name(const struct cma *cma)
 }
 
 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
-					     int align_order)
+					     unsigned int align_order)
 {
 	if (align_order <= cma->order_per_bit)
 		return 0;
@@ -67,17 +67,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
 }
 
 /*
- * Find a PFN aligned to the specified order and return an offset represented in
- * order_per_bits.
+ * Find the offset of the base PFN from the specified align_order.
+ * The value returned is represented in order_per_bits.
  */
 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
-					       int align_order)
+					       unsigned int align_order)
 {
-	if (align_order <= cma->order_per_bit)
-		return 0;
-
-	return (ALIGN(cma->base_pfn, (1UL << align_order))
-		- cma->base_pfn) >> cma->order_per_bit;
+	return (cma->base_pfn & ((1UL << align_order) - 1))
+		>> cma->order_per_bit;
 }
 
 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
@@ -127,7 +124,7 @@ static int __init cma_activate_area(struct cma *cma)
 			 * to be in the same zone.
 			 */
 			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto err;
+				goto not_in_zone;
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
@@ -141,7 +138,8 @@ static int __init cma_activate_area(struct cma *cma)
 
 	return 0;
 
-err:
+not_in_zone:
+	pr_err("CMA area %s could not be activated\n", cma->name);
 	kfree(cma->bitmap);
 	cma->count = 0;
 	return -EINVAL;

diff --git a/mm/filemap.c b/mm/filemap.c
index 3247b42..a497024 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -239,14 +239,16 @@ void __delete_from_page_cache(struct page *page, void *shadow)
 	/* Leave page->index set: truncation lookup relies upon it */
 
 	/* hugetlb pages do not participate in page cache accounting. */
-	if (!PageHuge(page))
-		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
+	if (PageHuge(page))
+		return;
+
+	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
 	if (PageSwapBacked(page)) {
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
 		if (PageTransHuge(page))
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 	} else {
-		VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
+		VM_BUG_ON_PAGE(PageTransHuge(page), page);
 	}
 
 	/*

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1a88006..bc48ee7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c

@@ -20,9 +20,9 @@
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
+#include <linux/string_helpers.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
-#include <linux/page-isolation.h>
 #include <linux/jhash.h>
 
 #include <asm/page.h>
@@ -872,7 +872,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 	struct page *page;
 
 	list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
-		if (!is_migrate_isolate_page(page))
+		if (!PageHWPoison(page))
 			break;
 	/*
 	 * if 'non-isolated free hugepage' not found on the list,
@@ -887,19 +887,39 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 	return page;
 }
 
-static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
+		nodemask_t *nmask)
 {
-	struct page *page;
-	int node;
+	unsigned int cpuset_mems_cookie;
+	struct zonelist *zonelist;
+	struct zone *zone;
+	struct zoneref *z;
+	int node = -1;
 
-	if (nid != NUMA_NO_NODE)
-		return dequeue_huge_page_node_exact(h, nid);
+	zonelist = node_zonelist(nid, gfp_mask);
 
-	for_each_online_node(node) {
+retry_cpuset:
+	cpuset_mems_cookie = read_mems_allowed_begin();
+	for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+		struct page *page;
+
+		if (!cpuset_zone_allowed(zone, gfp_mask))
+			continue;
+		/*
+		 * no need to ask again on the same node. Pool is node rather than
+		 * zone aware
+		 */
+		if (zone_to_nid(zone) == node)
+			continue;
+		node = zone_to_nid(zone);
+
 		page = dequeue_huge_page_node_exact(h, node);
 		if (page)
 			return page;
 	}
+	if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie)))
+		goto retry_cpuset;
+
 	return NULL;
 }
 
@@ -917,15 +937,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 				unsigned long address, int avoid_reserve,
 				long chg)
 {
-	struct page *page = NULL;
+	struct page *page;
 	struct mempolicy *mpol;
-	nodemask_t *nodemask;
 	gfp_t gfp_mask;
+	nodemask_t *nodemask;
 	int nid;
-	struct zonelist *zonelist;
-	struct zone *zone;
-	struct zoneref *z;
-	unsigned int cpuset_mems_cookie;
 
 	/*
 	 * A child process with MAP_PRIVATE mappings created by their parent
@@ -940,32 +956,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 	if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
 		goto err;
 
-retry_cpuset:
-	cpuset_mems_cookie = read_mems_allowed_begin();
 	gfp_mask = htlb_alloc_mask(h);
 	nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-	zonelist = node_zonelist(nid, gfp_mask);
-
-	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed(zone, gfp_mask)) {
-			page = dequeue_huge_page_node(h, zone_to_nid(zone));
-			if (page) {
-				if (avoid_reserve)
-					break;
-				if (!vma_has_reserves(vma, chg))
-					break;
-
-				SetPagePrivate(page);
-				h->resv_huge_pages--;
-				break;
-			}
-		}
+	page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+	if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
+		SetPagePrivate(page);
+		h->resv_huge_pages--;
 	}
 
 	mpol_cond_put(mpol);
-	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
-		goto retry_cpuset;
 	return page;
 
 err:
@@ -1385,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 	page = __alloc_pages_node(nid,
 		htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
-						__GFP_REPEAT|__GFP_NOWARN,
+						__GFP_RETRY_MAYFAIL|__GFP_NOWARN,
 		huge_page_order(h));
 	if (page) {
 		prep_new_huge_page(h, page, nid);
@@ -1460,7 +1459,7 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
  * number of free hugepages would be reduced below the number of reserved
  * hugepages.
  */
-static int dissolve_free_huge_page(struct page *page)
+int dissolve_free_huge_page(struct page *page)
 {
 	int rc = 0;
 
@@ -1473,6 +1472,14 @@ static int dissolve_free_huge_page(struct page *page)
 			rc = -EBUSY;
 			goto out;
 		}
+		/*
+		 * Move PageHWPoison flag from head page to the raw error page,
+		 * which makes any subpages rather than the error page reusable.
+		 */
+		if (PageHWPoison(head) && page != head) {
+			SetPageHWPoison(page);
+			ClearPageHWPoison(head);
+		}
 		list_del(&head->lru);
 		h->free_huge_pages--;
 		h->free_huge_pages_node[nid]--;
@@ -1513,82 +1520,19 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 	return rc;
 }
 
-/*
- * There are 3 ways this can get called:
- * 1. With vma+addr: we use the VMA's memory policy
- * 2. With !vma, but nid=NUMA_NO_NODE:  We try to allocate a huge
- *    page from any node, and let the buddy allocator itself figure
- *    it out.
- * 3. With !vma, but nid!=NUMA_NO_NODE.  We allocate a huge page
- *    strictly from 'nid'
- */
 static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+		gfp_t gfp_mask, int nid, nodemask_t *nmask)
 {
 	int order = huge_page_order(h);
-	gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
-	unsigned int cpuset_mems_cookie;
 
-	/*
-	 * We need a VMA to get a memory policy.  If we do not
-	 * have one, we use the 'nid' argument.
-	 *
-	 * The mempolicy stuff below has some non-inlined bits
-	 * and calls ->vm_ops.  That makes it hard to optimize at
-	 * compile-time, even when NUMA is off and it does
-	 * nothing.  This helps the compiler optimize it out.
-	 */
-	if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
-		/*
-		 * If a specific node is requested, make sure to
-		 * get memory from there, but only when a node
-		 * is explicitly specified.
-		 */
-		if (nid != NUMA_NO_NODE)
-			gfp |= __GFP_THISNODE;
-		/*
-		 * Make sure to call something that can handle
-		 * nid=NUMA_NO_NODE
-		 */
-		return alloc_pages_node(nid, gfp, order);
-	}
-
-	/*
-	 * OK, so we have a VMA.  Fetch the mempolicy and try to
-	 * allocate a huge page with it.  We will only reach this
-	 * when CONFIG_NUMA=y.
-	 */
-	do {
-		struct page *page;
-		struct mempolicy *mpol;
-		int nid;
-		nodemask_t *nodemask;
-
-		cpuset_mems_cookie = read_mems_allowed_begin();
-		nid = huge_node(vma, addr, gfp, &mpol, &nodemask);
-		mpol_cond_put(mpol);
-		page = __alloc_pages_nodemask(gfp, order, nid, nodemask);
-		if (page)
-			return page;
-	} while (read_mems_allowed_retry(cpuset_mems_cookie));
-
-	return NULL;
+	gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
+	if (nid == NUMA_NO_NODE)
+		nid = numa_mem_id();
+	return __alloc_pages_nodemask(gfp_mask, order, nid, nmask);
 }
 
-/*
- * There are two ways to allocate a huge page:
- * 1. When you have a VMA and an address (like a fault)
- * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
- *
- * 'vma' and 'addr' are only for (1).  'nid' is always NUMA_NO_NODE in
- * this case which signifies that the allocation should be done with
- * respect for the VMA's memory policy.
- *
- * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
- * implies that memory policies will not be taken in to account.
- */
-static struct page *__alloc_buddy_huge_page(struct hstate *h,
-		struct vm_area_struct *vma, unsigned long addr, int nid)
+static struct page *__alloc_buddy_huge_page(struct hstate *h, gfp_t gfp_mask,
+		int nid, nodemask_t *nmask)
 {
 	struct page *page;
 	unsigned int r_nid;
@@ -1597,15 +1541,6 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 		return NULL;
 
 	/*
-	 * Make sure that anyone specifying 'nid' is not also specifying a VMA.
-	 * This makes sure the caller is picking _one_ of the modes with which
-	 * we can call this function, not both.
-	 */
-	if (vma || (addr != -1)) {
-		VM_WARN_ON_ONCE(addr == -1);
-		VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
-	}
-	/*
 	 * Assume we will successfully allocate the surplus page to
 	 * prevent racing processes from causing the surplus to exceed
 	 * overcommit
@@ -1638,7 +1573,7 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 	}
 	spin_unlock(&hugetlb_lock);
 
-	page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
+	page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
@@ -1663,26 +1598,23 @@ static struct page *__alloc_buddy_huge_page(struct hstate *h,
 }
 
 /*
- * Allocate a huge page from 'nid'.  Note, 'nid' may be
- * NUMA_NO_NODE, which means that it may be allocated
- * anywhere.
- */
-static
-struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
-{
-	unsigned long addr = -1;
-
-	return __alloc_buddy_huge_page(h, NULL, addr, nid);
-}
-
-/*
  * Use the VMA's mpolicy to allocate a huge page from the buddy.
  */
 static
 struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
 		struct vm_area_struct *vma, unsigned long addr)
 {
-	return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
+	struct page *page;
+	struct mempolicy *mpol;
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+	int nid;
+	nodemask_t *nodemask;
+
+	nid = huge_node(vma, addr, gfp_mask, &mpol, &nodemask);
+	page = __alloc_buddy_huge_page(h, gfp_mask, nid, nodemask);
+	mpol_cond_put(mpol);
+
+	return page;
 }
 
 /*
@@ -1692,19 +1624,46 @@ struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
  */
 struct page *alloc_huge_page_node(struct hstate *h, int nid)
 {
+	gfp_t gfp_mask = htlb_alloc_mask(h);
 	struct page *page = NULL;
 
+	if (nid != NUMA_NO_NODE)
+		gfp_mask |= __GFP_THISNODE;
+
 	spin_lock(&hugetlb_lock);
 	if (h->free_huge_pages - h->resv_huge_pages > 0)
-		page = dequeue_huge_page_node(h, nid);
+		page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
 	spin_unlock(&hugetlb_lock);
 
 	if (!page)
-		page = __alloc_buddy_huge_page_no_mpol(h, nid);
+		page = __alloc_buddy_huge_page(h, gfp_mask, nid, NULL);
 
 	return page;
 }
 
+
+struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
+		nodemask_t *nmask)
+{
+	gfp_t gfp_mask = htlb_alloc_mask(h);
+
+	spin_lock(&hugetlb_lock);
+	if (h->free_huge_pages - h->resv_huge_pages > 0) {
+		struct page *page;
+
+		page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
+		if (page) {
+			spin_unlock(&hugetlb_lock);
+			return page;
+		}
+	}
+	spin_unlock(&hugetlb_lock);
+
+	/* No reservations, try to overcommit */
+
+	return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask);
+}
+
 /*
  * Increase the hugetlb pool such that it can accommodate a reservation
  * of size 'delta'.
@@ -1730,12 +1689,14 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
+		page = __alloc_buddy_huge_page(h, htlb_alloc_mask(h),
+				NUMA_NO_NODE, NULL);
 		if (!page) {
 			alloc_ok = false;
 			break;
 		}
 		list_add(&page->lru, &surplus_list);
+		cond_resched();
 	}
 	allocated += i;
 
@@ -2204,8 +2165,16 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 		} else if (!alloc_fresh_huge_page(h,
 					 &node_states[N_MEMORY]))
 			break;
+		cond_resched();
 	}
-	h->max_huge_pages = i;
+	if (i < h->max_huge_pages) {
+		char buf[32];
+
+		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+		pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
+			h->max_huge_pages, buf, i);
+		h->max_huge_pages = i;
+	}
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -2223,26 +2192,16 @@ static void __init hugetlb_init_hstates(void)
 	VM_BUG_ON(minimum_order == UINT_MAX);
 }
 
-static char * __init memfmt(char *buf, unsigned long n)
-{
-	if (n >= (1UL << 30))
-		sprintf(buf, "%lu GB", n >> 30);
-	else if (n >= (1UL << 20))
-		sprintf(buf, "%lu MB", n >> 20);
-	else
-		sprintf(buf, "%lu KB", n >> 10);
-	return buf;
-}
-
 static void __init report_hugepages(void)
 {
 	struct hstate *h;
 
 	for_each_hstate(h) {
 		char buf[32];
+
+		string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
 		pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n",
-			memfmt(buf, huge_page_size(h)),
-			h->free_huge_pages);
+			buf, h->free_huge_pages);
 	}
 }
 
@@ -2801,6 +2760,11 @@ static int __init hugetlb_init(void)
 		return 0;
 
 	if (!size_to_hstate(default_hstate_size)) {
+		if (default_hstate_size != 0) {
+			pr_err("HugeTLB: unsupported default_hugepagesz %lu. Reverting to %lu\n",
+			       default_hstate_size, HPAGE_SIZE);
+		}
+
 		default_hstate_size = HPAGE_SIZE;
 		if (!size_to_hstate(default_hstate_size))
 			hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
@@ -4739,40 +4703,6 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
 	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
 }
 
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * This function is called from memory failure code.
- */
-int dequeue_hwpoisoned_huge_page(struct page *hpage)
-{
-	struct hstate *h = page_hstate(hpage);
-	int nid = page_to_nid(hpage);
-	int ret = -EBUSY;
-
-	spin_lock(&hugetlb_lock);
-	/*
-	 * Just checking !page_huge_active is not enough, because that could be
-	 * an isolated/hwpoisoned hugepage (which have >0 refcount).
-	 */
-	if (!page_huge_active(hpage) && !page_count(hpage)) {
-		/*
-		 * Hwpoisoned hugepage isn't linked to activelist or freelist,
-		 * but dangling hpage->lru can trigger list-debug warnings
-		 * (this happens when we call unpoison_memory() on it),
-		 * so let it point to itself with list_del_init().
-		 */
-		list_del_init(&hpage->lru);
-		set_page_refcounted(hpage);
-		h->free_huge_pages--;
-		h->free_huge_pages_node[nid]--;
-		ret = 0;
-	}
-	spin_unlock(&hugetlb_lock);
-	return ret;
-}
-#endif
-
 bool isolate_huge_page(struct page *page, struct list_head *list)
 {
 	bool ret = true;

diff --git a/mm/internal.h b/mm/internal.h
index 0e4f558..24d88f0 100644
--- a/mm/internal.h
+++ b/mm/internal.h

@@ -23,7 +23,7 @@
  * hints such as HIGHMEM usage.
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
-			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
 			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
 			__GFP_ATOMIC)
 

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index c81549d..ca11bc4 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c

@@ -134,97 +134,33 @@ static __always_inline bool memory_is_poisoned_1(unsigned long addr)
 	return false;
 }
 
-static __always_inline bool memory_is_poisoned_2(unsigned long addr)
+static __always_inline bool memory_is_poisoned_2_4_8(unsigned long addr,
+						unsigned long size)
 {
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
+	u8 *shadow_addr = (u8 *)kasan_mem_to_shadow((void *)addr);
 
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 1))
-			return true;
+	/*
+	 * Access crosses 8(shadow size)-byte boundary. Such access maps
+	 * into 2 shadow bytes, so we need to check them both.
+	 */
+	if (unlikely(((addr + size - 1) & KASAN_SHADOW_MASK) < size - 1))
+		return *shadow_addr || memory_is_poisoned_1(addr + size - 1);
 
-		/*
-		 * If single shadow byte covers 2-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
-
-	return false;
-}
-
-static __always_inline bool memory_is_poisoned_4(unsigned long addr)
-{
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 3))
-			return true;
-
-		/*
-		 * If single shadow byte covers 4-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
-
-	return false;
-}
-
-static __always_inline bool memory_is_poisoned_8(unsigned long addr)
-{
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
-
-	if (unlikely(*shadow_addr)) {
-		if (memory_is_poisoned_1(addr + 7))
-			return true;
-
-		/*
-		 * If single shadow byte covers 8-byte access, we don't
-		 * need to do anything more. Otherwise, test the first
-		 * shadow byte.
-		 */
-		if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
-			return false;
-
-		return unlikely(*(u8 *)shadow_addr);
-	}
-
-	return false;
+	return memory_is_poisoned_1(addr + size - 1);
 }
 
 static __always_inline bool memory_is_poisoned_16(unsigned long addr)
 {
-	u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr);
+	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
 
-	if (unlikely(*shadow_addr)) {
-		u16 shadow_first_bytes = *(u16 *)shadow_addr;
+	/* Unaligned 16-bytes access maps into 3 shadow bytes. */
+	if (unlikely(!IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
+		return *shadow_addr || memory_is_poisoned_1(addr + 15);
 
-		if (unlikely(shadow_first_bytes))
-			return true;
-
-		/*
-		 * If two shadow bytes covers 16-byte access, we don't
-		 * need to do anything more. Otherwise, test the last
-		 * shadow byte.
-		 */
-		if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
-			return false;
-
-		return memory_is_poisoned_1(addr + 15);
-	}
-
-	return false;
+	return *shadow_addr;
 }
 
-static __always_inline unsigned long bytes_is_zero(const u8 *start,
+static __always_inline unsigned long bytes_is_nonzero(const u8 *start,
 					size_t size)
 {
 	while (size) {
@@ -237,7 +173,7 @@ static __always_inline unsigned long bytes_is_zero(const u8 *start,
 	return 0;
 }
 
-static __always_inline unsigned long memory_is_zero(const void *start,
+static __always_inline unsigned long memory_is_nonzero(const void *start,
 						const void *end)
 {
 	unsigned int words;
@@ -245,11 +181,11 @@ static __always_inline unsigned long memory_is_zero(const void *start,
 	unsigned int prefix = (unsigned long)start % 8;
 
 	if (end - start <= 16)
-		return bytes_is_zero(start, end - start);
+		return bytes_is_nonzero(start, end - start);
 
 	if (prefix) {
 		prefix = 8 - prefix;
-		ret = bytes_is_zero(start, prefix);
+		ret = bytes_is_nonzero(start, prefix);
 		if (unlikely(ret))
 			return ret;
 		start += prefix;
@@ -258,12 +194,12 @@ static __always_inline unsigned long memory_is_zero(const void *start,
 	words = (end - start) / 8;
 	while (words) {
 		if (unlikely(*(u64 *)start))
-			return bytes_is_zero(start, 8);
+			return bytes_is_nonzero(start, 8);
 		start += 8;
 		words--;
 	}
 
-	return bytes_is_zero(start, (end - start) % 8);
+	return bytes_is_nonzero(start, (end - start) % 8);
 }
 
 static __always_inline bool memory_is_poisoned_n(unsigned long addr,
@@ -271,7 +207,7 @@ static __always_inline bool memory_is_poisoned_n(unsigned long addr,
 {
 	unsigned long ret;
 
-	ret = memory_is_zero(kasan_mem_to_shadow((void *)addr),
+	ret = memory_is_nonzero(kasan_mem_to_shadow((void *)addr),
 			kasan_mem_to_shadow((void *)addr + size - 1) + 1);
 
 	if (unlikely(ret)) {
@@ -292,11 +228,9 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
 		case 1:
 			return memory_is_poisoned_1(addr);
 		case 2:
-			return memory_is_poisoned_2(addr);
 		case 4:
-			return memory_is_poisoned_4(addr);
 		case 8:
-			return memory_is_poisoned_8(addr);
+			return memory_is_poisoned_2_4_8(addr, size);
 		case 16:
 			return memory_is_poisoned_16(addr);
 		default:
@@ -803,17 +737,47 @@ void __asan_unpoison_stack_memory(const void *addr, size_t size)
 EXPORT_SYMBOL(__asan_unpoison_stack_memory);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int kasan_mem_notifier(struct notifier_block *nb,
+static int __meminit kasan_mem_notifier(struct notifier_block *nb,
 			unsigned long action, void *data)
 {
-	return (action == MEM_GOING_ONLINE) ? NOTIFY_BAD : NOTIFY_OK;
+	struct memory_notify *mem_data = data;
+	unsigned long nr_shadow_pages, start_kaddr, shadow_start;
+	unsigned long shadow_end, shadow_size;
+
+	nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
+	start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
+	shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
+	shadow_size = nr_shadow_pages << PAGE_SHIFT;
+	shadow_end = shadow_start + shadow_size;
+
+	if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) ||
+		WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT)))
+		return NOTIFY_BAD;
+
+	switch (action) {
+	case MEM_GOING_ONLINE: {
+		void *ret;
+
+		ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
+					shadow_end, GFP_KERNEL,
+					PAGE_KERNEL, VM_NO_GUARD,
+					pfn_to_nid(mem_data->start_pfn),
+					__builtin_return_address(0));
+		if (!ret)
+			return NOTIFY_BAD;
+
+		kmemleak_ignore(ret);
+		return NOTIFY_OK;
+	}
+	case MEM_OFFLINE:
+		vfree((void *)shadow_start);
+	}
+
+	return NOTIFY_OK;
 }
 
 static int __init kasan_memhotplug_init(void)
 {
-	pr_info("WARNING: KASAN doesn't support memory hot-add\n");
-	pr_info("Memory hot-add will be disabled\n");
-
 	hotplug_memory_notifier(kasan_mem_notifier, 0);
 
 	return 0;

diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index b96a5f7..554e4c0 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c

@@ -118,6 +118,18 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 
 	do {
 		next = p4d_addr_end(addr, end);
+		if (IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) {
+			pud_t *pud;
+			pmd_t *pmd;
+
+			p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
+			pud = pud_offset(p4d, addr);
+			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
+			pmd = pmd_offset(pud, addr);
+			pmd_populate_kernel(&init_mm, pmd,
+						lm_alias(kasan_zero_pte));
+			continue;
+		}
 
 		if (p4d_none(*p4d)) {
 			p4d_populate(&init_mm, p4d,

diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index beee0e9..04bb1d3 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c

@@ -107,7 +107,7 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info)
 	return bug_type;
 }
 
-const char *get_wild_bug_type(struct kasan_access_info *info)
+static const char *get_wild_bug_type(struct kasan_access_info *info)
 {
 	const char *bug_type = "unknown-crash";
 

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index df4ebdb..c01f177 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c

@@ -816,7 +816,8 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
 static bool hugepage_vma_check(struct vm_area_struct *vma)
 {
 	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-	    (vma->vm_flags & VM_NOHUGEPAGE))
+	    (vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		return false;
 	if (shmem_file(vma->vm_file)) {
 		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))

diff --git a/mm/list_lru.c b/mm/list_lru.c
index 234676e..7a40fa2b 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c

@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_add_tail(item, &l->list);
 		l->nr_items++;
+		nlru->nr_items++;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
 		l = list_lru_from_kmem(nlru, item);
 		list_del_init(item);
 		l->nr_items--;
+		nlru->nr_items--;
 		spin_unlock(&nlru->lock);
 		return true;
 	}
@@ -183,15 +185,10 @@ EXPORT_SYMBOL_GPL(list_lru_count_one);
 
 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 {
-	long count = 0;
-	int memcg_idx;
+	struct list_lru_node *nlru;
 
-	count += __list_lru_count_one(lru, nid, -1);
-	if (list_lru_memcg_aware(lru)) {
-		for_each_memcg_cache_index(memcg_idx)
-			count += __list_lru_count_one(lru, nid, memcg_idx);
-	}
-	return count;
+	nlru = &lru->node[nid];
+	return nlru->nr_items;
 }
 EXPORT_SYMBOL_GPL(list_lru_count_node);
 
@@ -226,6 +223,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 			assert_spin_locked(&nlru->lock);
 		case LRU_REMOVED:
 			isolated++;
+			nlru->nr_items--;
 			/*
 			 * If the lru lock has been dropped, our list
 			 * traversal is now invalid and so we have to

diff --git a/mm/madvise.c b/mm/madvise.c
index 25b78ee..9976852 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c

@@ -205,7 +205,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 			continue;
 
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
-								vma, index);
+							vma, index, false);
 		if (page)
 			put_page(page);
 	}
@@ -246,7 +246,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		}
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
-								NULL, 0);
+							NULL, 0, false);
 		if (page)
 			put_page(page);
 	}
@@ -451,9 +451,6 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	struct mmu_gather tlb;
 
-	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-		return -EINVAL;
-
 	/* MADV_FREE works for only anon vma at the moment */
 	if (!vma_is_anonymous(vma))
 		return -EINVAL;
@@ -477,14 +474,6 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 	return 0;
 }
 
-static long madvise_free(struct vm_area_struct *vma,
-			     struct vm_area_struct **prev,
-			     unsigned long start, unsigned long end)
-{
-	*prev = vma;
-	return madvise_free_single_vma(vma, start, end);
-}
-
 /*
  * Application no longer needs these pages.  If the pages are dirty,
  * it's OK to just throw them away.  The app will be more careful about
@@ -504,9 +493,17 @@ static long madvise_free(struct vm_area_struct *vma,
  * An interface that causes the system to free clean pages and flush
  * dirty pages is already available as msync(MS_INVALIDATE).
  */
-static long madvise_dontneed(struct vm_area_struct *vma,
-			     struct vm_area_struct **prev,
-			     unsigned long start, unsigned long end)
+static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
+					unsigned long start, unsigned long end)
+{
+	zap_page_range(vma, start, end - start);
+	return 0;
+}
+
+static long madvise_dontneed_free(struct vm_area_struct *vma,
+				  struct vm_area_struct **prev,
+				  unsigned long start, unsigned long end,
+				  int behavior)
 {
 	*prev = vma;
 	if (!can_madv_dontneed_vma(vma))
@@ -526,7 +523,8 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			 * is also < vma->vm_end. If start <
 			 * vma->vm_start it means an hole materialized
 			 * in the user address space within the
-			 * virtual range passed to MADV_DONTNEED.
+			 * virtual range passed to MADV_DONTNEED
+			 * or MADV_FREE.
 			 */
 			return -ENOMEM;
 		}
@@ -537,7 +535,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			 * Don't fail if end > vma->vm_end. If the old
 			 * vma was splitted while the mmap_sem was
 			 * released the effect of the concurrent
-			 * operation may not cause MADV_DONTNEED to
+			 * operation may not cause madvise() to
 			 * have an undefined result. There may be an
 			 * adjacent next vma that we'll walk
 			 * next. userfaultfd_remove() will generate an
@@ -549,8 +547,13 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 		}
 		VM_WARN_ON(start >= end);
 	}
-	zap_page_range(vma, start, end - start);
-	return 0;
+
+	if (behavior == MADV_DONTNEED)
+		return madvise_dontneed_single_vma(vma, start, end);
+	else if (behavior == MADV_FREE)
+		return madvise_free_single_vma(vma, start, end);
+	else
+		return -EINVAL;
 }
 
 /*
@@ -656,9 +659,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	case MADV_WILLNEED:
 		return madvise_willneed(vma, prev, start, end);
 	case MADV_FREE:
-		return madvise_free(vma, prev, start, end);
 	case MADV_DONTNEED:
-		return madvise_dontneed(vma, prev, start, end);
+		return madvise_dontneed_free(vma, prev, start, end, behavior);
 	default:
 		return madvise_behavior(vma, prev, start, end, behavior);
 	}

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 425aa0c..3df3c04 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c

@@ -631,7 +631,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
 	val = __this_cpu_read(memcg->stat->nr_page_events);
 	next = __this_cpu_read(memcg->stat->targets[target]);
 	/* from time_after() in jiffies.h */
-	if ((long)next - (long)val < 0) {
+	if ((long)(next - val) < 0) {
 		switch (target) {
 		case MEM_CGROUP_TARGET_THRESH:
 			next = val + THRESHOLDS_EVENTS_TARGET;
@@ -5317,38 +5317,52 @@ struct cgroup_subsys memory_cgrp_subsys = {
 
 /**
  * mem_cgroup_low - check if memory consumption is below the normal range
- * @root: the highest ancestor to consider
+ * @root: the top ancestor of the sub-tree being checked
  * @memcg: the memory cgroup to check
  *
  * Returns %true if memory consumption of @memcg, and that of all
- * configurable ancestors up to @root, is below the normal range.
+ * ancestors up to (but not including) @root, is below the normal range.
+ *
+ * @root is exclusive; it is never low when looked at directly and isn't
+ * checked when traversing the hierarchy.
+ *
+ * Excluding @root enables using memory.low to prioritize memory usage
+ * between cgroups within a subtree of the hierarchy that is limited by
+ * memory.high or memory.max.
+ *
+ * For example, given cgroup A with children B and C:
+ *
+ *    A
+ *   / \
+ *  B   C
+ *
+ * and
+ *
+ *  1. A/memory.current > A/memory.high
+ *  2. A/B/memory.current < A/B/memory.low
+ *  3. A/C/memory.current >= A/C/memory.low
+ *
+ * As 'A' is high, i.e. triggers reclaim from 'A', and 'B' is low, we
+ * should reclaim from 'C' until 'A' is no longer high or until we can
+ * no longer reclaim from 'C'.  If 'A', i.e. @root, isn't excluded by
+ * mem_cgroup_low when reclaming from 'A', then 'B' won't be considered
+ * low and we will reclaim indiscriminately from both 'B' and 'C'.
  */
 bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
 {
 	if (mem_cgroup_disabled())
 		return false;
 
-	/*
-	 * The toplevel group doesn't have a configurable range, so
-	 * it's never low when looked at directly, and it is not
-	 * considered an ancestor when assessing the hierarchy.
-	 */
-
-	if (memcg == root_mem_cgroup)
+	if (!root)
+		root = root_mem_cgroup;
+	if (memcg == root)
 		return false;
 
-	if (page_counter_read(&memcg->memory) >= memcg->low)
-		return false;
-
-	while (memcg != root) {
-		memcg = parent_mem_cgroup(memcg);
-
-		if (memcg == root_mem_cgroup)
-			break;
-
+	for (; memcg != root; memcg = parent_mem_cgroup(memcg)) {
 		if (page_counter_read(&memcg->memory) >= memcg->low)
 			return false;
 	}
+
 	return true;
 }
 

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dbe3e50..1cd3b35 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c

@@ -49,7 +49,6 @@
 #include <linux/swap.h>
 #include <linux/backing-dev.h>
 #include <linux/migrate.h>
-#include <linux/page-isolation.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
@@ -555,6 +554,39 @@ static int delete_from_lru_cache(struct page *p)
 	return -EIO;
 }
 
+static int truncate_error_page(struct page *p, unsigned long pfn,
+				struct address_space *mapping)
+{
+	int ret = MF_FAILED;
+
+	if (mapping->a_ops->error_remove_page) {
+		int err = mapping->a_ops->error_remove_page(mapping, p);
+
+		if (err != 0) {
+			pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
+				pfn, err);
+		} else if (page_has_private(p) &&
+			   !try_to_release_page(p, GFP_NOIO)) {
+			pr_info("Memory failure: %#lx: failed to release buffers\n",
+				pfn);
+		} else {
+			ret = MF_RECOVERED;
+		}
+	} else {
+		/*
+		 * If the file system doesn't support it just invalidate
+		 * This fails on dirty or anything with private pages
+		 */
+		if (invalidate_inode_page(p))
+			ret = MF_RECOVERED;
+		else
+			pr_info("Memory failure: %#lx: Failed to invalidate\n",
+				pfn);
+	}
+
+	return ret;
+}
+
 /*
  * Error hit kernel page.
  * Do nothing, try to be lucky and not touch this instead. For a few cases we
@@ -579,8 +611,6 @@ static int me_unknown(struct page *p, unsigned long pfn)
  */
 static int me_pagecache_clean(struct page *p, unsigned long pfn)
 {
-	int err;
-	int ret = MF_FAILED;
 	struct address_space *mapping;
 
 	delete_from_lru_cache(p);
@@ -612,30 +642,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 *
 	 * Open: to take i_mutex or not for this? Right now we don't.
 	 */
-	if (mapping->a_ops->error_remove_page) {
-		err = mapping->a_ops->error_remove_page(mapping, p);
-		if (err != 0) {
-			pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
-				pfn, err);
-		} else if (page_has_private(p) &&
-				!try_to_release_page(p, GFP_NOIO)) {
-			pr_info("Memory failure: %#lx: failed to release buffers\n",
-				pfn);
-		} else {
-			ret = MF_RECOVERED;
-		}
-	} else {
-		/*
-		 * If the file system doesn't support it just invalidate
-		 * This fails on dirty or anything with private pages
-		 */
-		if (invalidate_inode_page(p))
-			ret = MF_RECOVERED;
-		else
-			pr_info("Memory failure: %#lx: Failed to invalidate\n",
-				pfn);
-	}
-	return ret;
+	return truncate_error_page(p, pfn, mapping);
 }
 
 /*
@@ -741,24 +748,29 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 {
 	int res = 0;
 	struct page *hpage = compound_head(p);
+	struct address_space *mapping;
 
 	if (!PageHuge(hpage))
 		return MF_DELAYED;
 
-	/*
-	 * We can safely recover from error on free or reserved (i.e.
-	 * not in-use) hugepage by dequeuing it from freelist.
-	 * To check whether a hugepage is in-use or not, we can't use
-	 * page->lru because it can be used in other hugepage operations,
-	 * such as __unmap_hugepage_range() and gather_surplus_pages().
-	 * So instead we use page_mapping() and PageAnon().
-	 */
-	if (!(page_mapping(hpage) || PageAnon(hpage))) {
-		res = dequeue_hwpoisoned_huge_page(hpage);
-		if (!res)
-			return MF_RECOVERED;
+	mapping = page_mapping(hpage);
+	if (mapping) {
+		res = truncate_error_page(hpage, pfn, mapping);
+	} else {
+		unlock_page(hpage);
+		/*
+		 * migration entry prevents later access on error anonymous
+		 * hugepage, so we can free and dissolve it into buddy to
+		 * save healthy subpages.
+		 */
+		if (PageAnon(hpage))
+			put_page(hpage);
+		dissolve_free_huge_page(p);
+		res = MF_RECOVERED;
+		lock_page(hpage);
 	}
-	return MF_DELAYED;
+
+	return res;
 }
 
 /*
@@ -857,7 +869,7 @@ static int page_action(struct page_state *ps, struct page *p,
 	count = page_count(p) - 1;
 	if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
 		count--;
-	if (count != 0) {
+	if (count > 0) {
 		pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
 		       pfn, action_page_types[ps->type], count);
 		result = MF_FAILED;
@@ -1010,20 +1022,84 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	return unmap_success;
 }
 
-static void set_page_hwpoison_huge_page(struct page *hpage)
+static int identify_page_state(unsigned long pfn, struct page *p,
+				unsigned long page_flags)
 {
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		SetPageHWPoison(hpage + i);
+	struct page_state *ps;
+
+	/*
+	 * The first check uses the current page flags which may not have any
+	 * relevant information. The second check with the saved page flags is
+	 * carried out only if the first check can't determine the page status.
+	 */
+	for (ps = error_states;; ps++)
+		if ((p->flags & ps->mask) == ps->res)
+			break;
+
+	page_flags |= (p->flags & (1UL << PG_dirty));
+
+	if (!ps->mask)
+		for (ps = error_states;; ps++)
+			if ((page_flags & ps->mask) == ps->res)
+				break;
+	return page_action(ps, p, pfn);
 }
 
-static void clear_page_hwpoison_huge_page(struct page *hpage)
+static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
 {
-	int i;
-	int nr_pages = 1 << compound_order(hpage);
-	for (i = 0; i < nr_pages; i++)
-		ClearPageHWPoison(hpage + i);
+	struct page *p = pfn_to_page(pfn);
+	struct page *head = compound_head(p);
+	int res;
+	unsigned long page_flags;
+
+	if (TestSetPageHWPoison(head)) {
+		pr_err("Memory failure: %#lx: already hardware poisoned\n",
+		       pfn);
+		return 0;
+	}
+
+	num_poisoned_pages_inc();
+
+	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
+		/*
+		 * Check "filter hit" and "race with other subpage."
+		 */
+		lock_page(head);
+		if (PageHWPoison(head)) {
+			if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
+			    || (p != head && TestSetPageHWPoison(head))) {
+				num_poisoned_pages_dec();
+				unlock_page(head);
+				return 0;
+			}
+		}
+		unlock_page(head);
+		dissolve_free_huge_page(p);
+		action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
+		return 0;
+	}
+
+	lock_page(head);
+	page_flags = head->flags;
+
+	if (!PageHWPoison(head)) {
+		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
+		num_poisoned_pages_dec();
+		unlock_page(head);
+		put_hwpoison_page(head);
+		return 0;
+	}
+
+	if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) {
+		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
+		res = -EBUSY;
+		goto out;
+	}
+
+	res = identify_page_state(pfn, p, page_flags);
+out:
+	unlock_page(head);
+	return res;
 }
 
 /**
@@ -1046,12 +1122,10 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)
  */
 int memory_failure(unsigned long pfn, int trapno, int flags)
 {
-	struct page_state *ps;
 	struct page *p;
 	struct page *hpage;
 	struct page *orig_head;
 	int res;
-	unsigned int nr_pages;
 	unsigned long page_flags;
 
 	if (!sysctl_memory_failure_recovery)
@@ -1064,34 +1138,22 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 
 	p = pfn_to_page(pfn);
-	orig_head = hpage = compound_head(p);
+	if (PageHuge(p))
+		return memory_failure_hugetlb(pfn, trapno, flags);
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
 		return 0;
 	}
 
-	/*
-	 * Currently errors on hugetlbfs pages are measured in hugepage units,
-	 * so nr_pages should be 1 << compound_order.  OTOH when errors are on
-	 * transparent hugepages, they are supposed to be split and error
-	 * measurement is done in normal page units.  So nr_pages should be one
-	 * in this case.
-	 */
-	if (PageHuge(p))
-		nr_pages = 1 << compound_order(hpage);
-	else /* normal page or thp */
-		nr_pages = 1;
-	num_poisoned_pages_add(nr_pages);
+	orig_head = hpage = compound_head(p);
+	num_poisoned_pages_inc();
 
 	/*
 	 * We need/can do nothing about count=0 pages.
 	 * 1) it's a free page, and therefore in safe hand:
 	 *    prep_new_page() will be the gate keeper.
-	 * 2) it's a free hugepage, which is also safe:
-	 *    an affected hugepage will be dequeued from hugepage freelist,
-	 *    so there's no concern about reusing it ever after.
-	 * 3) it's part of a non-compound high order page.
+	 * 2) it's part of a non-compound high order page.
 	 *    Implies some kernel user: cannot stop them from
 	 *    R/W the page; let's pray that the page has been
 	 *    used and will be freed some time later.
@@ -1102,32 +1164,13 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		if (is_free_buddy_page(p)) {
 			action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
 			return 0;
-		} else if (PageHuge(hpage)) {
-			/*
-			 * Check "filter hit" and "race with other subpage."
-			 */
-			lock_page(hpage);
-			if (PageHWPoison(hpage)) {
-				if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-				    || (p != hpage && TestSetPageHWPoison(hpage))) {
-					num_poisoned_pages_sub(nr_pages);
-					unlock_page(hpage);
-					return 0;
-				}
-			}
-			set_page_hwpoison_huge_page(hpage);
-			res = dequeue_hwpoisoned_huge_page(hpage);
-			action_result(pfn, MF_MSG_FREE_HUGE,
-				      res ? MF_IGNORED : MF_DELAYED);
-			unlock_page(hpage);
-			return res;
 		} else {
 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
 			return -EBUSY;
 		}
 	}
 
-	if (!PageHuge(p) && PageTransHuge(hpage)) {
+	if (PageTransHuge(hpage)) {
 		lock_page(p);
 		if (!PageAnon(p) || unlikely(split_huge_page(p))) {
 			unlock_page(p);
@@ -1138,7 +1181,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 				pr_err("Memory failure: %#lx: thp split failed\n",
 					pfn);
 			if (TestClearPageHWPoison(p))
-				num_poisoned_pages_sub(nr_pages);
+				num_poisoned_pages_dec();
 			put_hwpoison_page(p);
 			return -EBUSY;
 		}
@@ -1165,7 +1208,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 		return 0;
 	}
 
-	lock_page(hpage);
+	lock_page(p);
 
 	/*
 	 * The page could have changed compound pages during the locking.
@@ -1194,42 +1237,23 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	 */
 	if (!PageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-		num_poisoned_pages_sub(nr_pages);
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
+		num_poisoned_pages_dec();
+		unlock_page(p);
+		put_hwpoison_page(p);
 		return 0;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			num_poisoned_pages_sub(nr_pages);
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
+			num_poisoned_pages_dec();
+		unlock_page(p);
+		put_hwpoison_page(p);
 		return 0;
 	}
 
-	if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p))
+	if (!PageTransTail(p) && !PageLRU(p))
 		goto identify_page_state;
 
 	/*
-	 * For error on the tail page, we should set PG_hwpoison
-	 * on the head page to show that the hugepage is hwpoisoned
-	 */
-	if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
-		action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED);
-		unlock_page(hpage);
-		put_hwpoison_page(hpage);
-		return 0;
-	}
-	/*
-	 * Set PG_hwpoison on all pages in an error hugepage,
-	 * because containment is done in hugepage unit for now.
-	 * Since we have done TestSetPageHWPoison() for the head page with
-	 * page lock held, we can safely set PG_hwpoison bits on tail pages.
-	 */
-	if (PageHuge(p))
-		set_page_hwpoison_huge_page(hpage);
-
-	/*
 	 * It's very difficult to mess with pages currently under IO
 	 * and in many cases impossible, so we just avoid it here.
 	 */
@@ -1258,25 +1282,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 
 identify_page_state:
-	res = -EBUSY;
-	/*
-	 * The first check uses the current page flags which may not have any
-	 * relevant information. The second check with the saved page flagss is
-	 * carried out only if the first check can't determine the page status.
-	 */
-	for (ps = error_states;; ps++)
-		if ((p->flags & ps->mask) == ps->res)
-			break;
-
-	page_flags |= (p->flags & (1UL << PG_dirty));
-
-	if (!ps->mask)
-		for (ps = error_states;; ps++)
-			if ((page_flags & ps->mask) == ps->res)
-				break;
-	res = page_action(ps, p, pfn);
+	res = identify_page_state(pfn, p, page_flags);
 out:
-	unlock_page(hpage);
+	unlock_page(p);
 	return res;
 }
 EXPORT_SYMBOL_GPL(memory_failure);
@@ -1398,7 +1406,6 @@ int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int freeit = 0;
-	unsigned int nr_pages;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
 
@@ -1443,20 +1450,7 @@ int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	nr_pages = 1 << compound_order(page);
-
 	if (!get_hwpoison_page(p)) {
-		/*
-		 * Since HWPoisoned hugepage should have non-zero refcount,
-		 * race between memory failure and unpoison seems to happen.
-		 * In such case unpoison fails and memory failure runs
-		 * to the end.
-		 */
-		if (PageHuge(page)) {
-			unpoison_pr_info("Unpoison: Memory failure is now running on free hugepage %#lx\n",
-					 pfn, &unpoison_rs);
-			return 0;
-		}
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -1474,10 +1468,8 @@ int unpoison_memory(unsigned long pfn)
 	if (TestClearPageHWPoison(page)) {
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 pfn, &unpoison_rs);
-		num_poisoned_pages_sub(nr_pages);
+		num_poisoned_pages_dec();
 		freeit = 1;
-		if (PageHuge(page))
-			clear_page_hwpoison_huge_page(page);
 	}
 	unlock_page(page);
 
@@ -1492,16 +1484,8 @@ EXPORT_SYMBOL(unpoison_memory);
 static struct page *new_page(struct page *p, unsigned long private, int **x)
 {
 	int nid = page_to_nid(p);
-	if (PageHuge(p)) {
-		struct hstate *hstate = page_hstate(compound_head(p));
 
-		if (hstate_is_gigantic(hstate))
-			return alloc_huge_page_node(hstate, NUMA_NO_NODE);
-
-		return alloc_huge_page_node(hstate, nid);
-	} else {
-		return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
-	}
+	return new_page_nodemask(p, nid, &node_states[N_MEMORY]);
 }
 
 /*
@@ -1608,15 +1592,8 @@ static int soft_offline_huge_page(struct page *page, int flags)
 		if (ret > 0)
 			ret = -EIO;
 	} else {
-		/* overcommit hugetlb page will be freed to buddy */
-		if (PageHuge(page)) {
-			set_page_hwpoison_huge_page(hpage);
-			dequeue_hwpoisoned_huge_page(hpage);
-			num_poisoned_pages_add(1 << compound_order(hpage));
-		} else {
-			SetPageHWPoison(page);
-			num_poisoned_pages_inc();
-		}
+		if (PageHuge(page))
+			dissolve_free_huge_page(page);
 	}
 	return ret;
 }
@@ -1732,15 +1709,12 @@ static int soft_offline_in_use_page(struct page *page, int flags)
 
 static void soft_offline_free_page(struct page *page)
 {
-	if (PageHuge(page)) {
-		struct page *hpage = compound_head(page);
+	struct page *head = compound_head(page);
 
-		set_page_hwpoison_huge_page(hpage);
-		if (!dequeue_hwpoisoned_huge_page(hpage))
-			num_poisoned_pages_add(1 << compound_order(hpage));
-	} else {
-		if (!TestSetPageHWPoison(page))
-			num_poisoned_pages_inc();
+	if (!TestSetPageHWPoison(head)) {
+		num_poisoned_pages_inc();
+		if (PageHuge(head))
+			dissolve_free_huge_page(page);
 	}
 }
 

diff --git a/mm/memory.c b/mm/memory.c
index e31dd97..0e517be 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -3262,14 +3262,14 @@ static int fault_around_bytes_set(void *data, u64 val)
 		fault_around_bytes = PAGE_SIZE; /* rounddown_pow_of_two(0) is undefined */
 	return 0;
 }
-DEFINE_SIMPLE_ATTRIBUTE(fault_around_bytes_fops,
+DEFINE_DEBUGFS_ATTRIBUTE(fault_around_bytes_fops,
 		fault_around_bytes_get, fault_around_bytes_set, "%llu\n");
 
 static int __init fault_around_debugfs(void)
 {
 	void *ret;
 
-	ret = debugfs_create_file("fault_around_bytes", 0644, NULL, NULL,
+	ret = debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
 			&fault_around_bytes_fops);
 	if (!ret)
 		pr_warn("Failed to create fault_around_bytes in debugfs");
@@ -3591,7 +3591,7 @@ static int do_numa_page(struct vm_fault *vmf)
 	return 0;
 }
 
-static int create_huge_pmd(struct vm_fault *vmf)
+static inline int create_huge_pmd(struct vm_fault *vmf)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_anonymous_page(vmf);

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f79aac7..8dccc31 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c

@@ -52,32 +52,17 @@ static void generic_online_page(struct page *page);
 static online_page_callback_t online_page_callback = generic_online_page;
 static DEFINE_MUTEX(online_page_callback_lock);
 
-/* The same as the cpu_hotplug lock, but for memory hotplug. */
-static struct {
-	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing mem hotplug operation.
-	 */
-	int refcount;
+DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} mem_hotplug = {
-	.active_writer = NULL,
-	.lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
-	.refcount = 0,
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	.dep_map = {.name = "mem_hotplug.lock" },
-#endif
-};
+void get_online_mems(void)
+{
+	percpu_down_read(&mem_hotplug_lock);
+}
 
-/* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */
-#define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map)
-#define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
-#define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
+void put_online_mems(void)
+{
+	percpu_up_read(&mem_hotplug_lock);
+}
 
 bool movable_node_enabled = false;
 
@@ -99,60 +84,16 @@ static int __init setup_memhp_default_state(char *str)
 }
 __setup("memhp_default_state=", setup_memhp_default_state);
 
-void get_online_mems(void)
-{
-	might_sleep();
-	if (mem_hotplug.active_writer == current)
-		return;
-	memhp_lock_acquire_read();
-	mutex_lock(&mem_hotplug.lock);
-	mem_hotplug.refcount++;
-	mutex_unlock(&mem_hotplug.lock);
-
-}
-
-void put_online_mems(void)
-{
-	if (mem_hotplug.active_writer == current)
-		return;
-	mutex_lock(&mem_hotplug.lock);
-
-	if (WARN_ON(!mem_hotplug.refcount))
-		mem_hotplug.refcount++; /* try to fix things up */
-
-	if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
-		wake_up_process(mem_hotplug.active_writer);
-	mutex_unlock(&mem_hotplug.lock);
-	memhp_lock_release();
-
-}
-
-/* Serializes write accesses to mem_hotplug.active_writer. */
-static DEFINE_MUTEX(memory_add_remove_lock);
-
 void mem_hotplug_begin(void)
 {
-	mutex_lock(&memory_add_remove_lock);
-
-	mem_hotplug.active_writer = current;
-
-	memhp_lock_acquire();
-	for (;;) {
-		mutex_lock(&mem_hotplug.lock);
-		if (likely(!mem_hotplug.refcount))
-			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&mem_hotplug.lock);
-		schedule();
-	}
+	cpus_read_lock();
+	percpu_down_write(&mem_hotplug_lock);
 }
 
 void mem_hotplug_done(void)
 {
-	mem_hotplug.active_writer = NULL;
-	mutex_unlock(&mem_hotplug.lock);
-	memhp_lock_release();
-	mutex_unlock(&memory_add_remove_lock);
+	percpu_up_write(&mem_hotplug_lock);
+	cpus_read_unlock();
 }
 
 /* add this memory to iomem resource */
@@ -580,11 +521,8 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	int nr_pages = PAGES_PER_SECTION;
-	int zone_type;
 	unsigned long flags;
 
-	zone_type = zone - pgdat->node_zones;
-
 	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
 	shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
@@ -934,6 +872,19 @@ struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
 	return &pgdat->node_zones[ZONE_NORMAL];
 }
 
+static inline bool movable_pfn_range(int nid, struct zone *default_zone,
+		unsigned long start_pfn, unsigned long nr_pages)
+{
+	if (!allow_online_pfn_range(nid, start_pfn, nr_pages,
+				MMOP_ONLINE_KERNEL))
+		return true;
+
+	if (!movable_node_is_enabled())
+		return false;
+
+	return !zone_intersects(default_zone, start_pfn, nr_pages);
+}
+
 /*
  * Associates the given pfn range with the given node and the zone appropriate
  * for the given online type.
@@ -949,10 +900,10 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
 		/*
 		 * MMOP_ONLINE_KEEP defaults to MMOP_ONLINE_KERNEL but use
 		 * movable zone if that is not possible (e.g. we are within
-		 * or past the existing movable zone)
+		 * or past the existing movable zone). movable_node overrides
+		 * this default and defaults to movable zone
 		 */
-		if (!allow_online_pfn_range(nid, start_pfn, nr_pages,
-					MMOP_ONLINE_KERNEL))
+		if (movable_pfn_range(nid, zone, start_pfn, nr_pages))
 			zone = movable_zone;
 	} else if (online_type == MMOP_ONLINE_MOVABLE) {
 		zone = &pgdat->node_zones[ZONE_MOVABLE];
@@ -1268,7 +1219,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 
 error:
 	/* rollback pgdat allocation and others */
-	if (new_pgdat)
+	if (new_pgdat && pgdat)
 		rollback_node_hotadd(nid, pgdat);
 	memblock_remove(start, size);
 
@@ -1420,32 +1371,19 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 static struct page *new_node_page(struct page *page, unsigned long private,
 		int **result)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
-	struct page *new_page = NULL;
 
 	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
+	 * try to allocate from a different node but reuse this node if there
+	 * are no other online nodes to be used (e.g. we are offlining a part
+	 * of the only existing node)
 	 */
-	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					next_node_in(nid, nmask));
-
 	node_clear(nid, nmask);
+	if (nodes_empty(nmask))
+		node_set(nid, nmask);
 
-	if (PageHighMem(page)
-	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	if (!nodes_empty(nmask))
-		new_page = __alloc_pages_nodemask(gfp_mask, 0, nid, &nmask);
-	if (!new_page)
-		new_page = __alloc_pages(gfp_mask, 0, nid);
-
-	return new_page;
+	return new_page_nodemask(page, nid, &nmask);
 }
 
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
@@ -1728,7 +1666,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		goto failed_removal;
 	ret = 0;
 	if (drain) {
-		lru_add_drain_all();
+		lru_add_drain_all_cpuslocked();
 		cond_resched();
 		drain_all_pages(zone);
 	}
@@ -1749,7 +1687,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 		}
 	}
 	/* drain all zone's lru pagevec, this is asynchronous... */
-	lru_add_drain_all();
+	lru_add_drain_all_cpuslocked();
 	yield();
 	/* drain pcp pages, this is synchronous. */
 	drain_all_pages(zone);

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7d8e562..d911fa5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -1078,7 +1078,8 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
 	/*
 	 * if !vma, alloc_page_vma() will use task or system default policy
 	 */
-	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL,
+			vma, address);
 }
 #else
 

diff --git a/mm/migrate.c b/mm/migrate.c
index 051cc15..6276715 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -1252,6 +1252,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 out:
 	if (rc != -EAGAIN)
 		putback_active_hugepage(hpage);
+	if (reason == MR_MEMORY_FAILURE && !test_set_page_hwpoison(hpage))
+		num_poisoned_pages_inc();
 
 	/*
 	 * If migration was not successful and there's a freeing callback, use
@@ -1914,7 +1916,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	int page_lru = page_is_file_cache(page);
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
-	pmd_t orig_entry;
 
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
@@ -1957,8 +1958,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	/* Recheck the target PMD */
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	ptl = pmd_lock(mm, pmd);
-	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
-fail_putback:
+	if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
 		spin_unlock(ptl);
 		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
@@ -1980,7 +1980,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 		goto out_unlock;
 	}
 
-	orig_entry = *pmd;
 	entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
@@ -1997,15 +1996,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	set_pmd_at(mm, mmun_start, pmd, entry);
 	update_mmu_cache_pmd(vma, address, &entry);
 
-	if (page_count(page) != 2) {
-		set_pmd_at(mm, mmun_start, pmd, orig_entry);
-		flush_pmd_tlb_range(vma, mmun_start, mmun_end);
-		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
-		update_mmu_cache_pmd(vma, address, &entry);
-		page_remove_rmap(new_page, true);
-		goto fail_putback;
-	}
-
+	page_ref_unfreeze(page, 2);
 	mlock_migrate_page(new_page, page);
 	page_remove_rmap(page, true);
 	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);

diff --git a/mm/mmap.c b/mm/mmap.c
index 7f8cfe9..7fa6759 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c

@@ -2177,7 +2177,6 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 			     unsigned long size, unsigned long grow)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct rlimit *rlim = current->signal->rlim;
 	unsigned long new_start;
 
 	/* address space limit tests */
@@ -2185,7 +2184,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	/* Stack limit test */
-	if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+	if (size > rlimit(RLIMIT_STACK))
 		return -ENOMEM;
 
 	/* mlock limit tests */
@@ -2193,7 +2192,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
 		unsigned long locked;
 		unsigned long limit;
 		locked = mm->locked_vm + grow;
-		limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
+		limit = rlimit(RLIMIT_MEMLOCK);
 		limit >>= PAGE_SHIFT;
 		if (locked > limit && !capable(CAP_IPC_LOCK))
 			return -ENOMEM;
@@ -2244,7 +2243,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		gap_addr = TASK_SIZE;
 
 	next = vma->vm_next;
-	if (next && next->vm_start < gap_addr) {
+	if (next && next->vm_start < gap_addr &&
+			(next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
 		if (!(next->vm_flags & VM_GROWSUP))
 			return -ENOMEM;
 		/* Check that both stack segments have the same anon_vma? */
@@ -2315,7 +2315,6 @@ int expand_downwards(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *prev;
-	unsigned long gap_addr;
 	int error;
 
 	address &= PAGE_MASK;
@@ -2324,14 +2323,12 @@ int expand_downwards(struct vm_area_struct *vma,
 		return error;
 
 	/* Enforce stack_guard_gap */
-	gap_addr = address - stack_guard_gap;
-	if (gap_addr > address)
-		return -ENOMEM;
 	prev = vma->vm_prev;
-	if (prev && prev->vm_end > gap_addr) {
-		if (!(prev->vm_flags & VM_GROWSDOWN))
+	/* Check that both stack segments have the same anon_vma? */
+	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
+			(prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
+		if (address - prev->vm_end < stack_guard_gap)
 			return -ENOMEM;
-		/* Check that both stack segments have the same anon_vma? */
 	}
 
 	/* We must make sure the anon_vma is allocated. */

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0e2c925..9e8b4f0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c

@@ -490,6 +490,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
 		ret = false;
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
@@ -500,9 +501,12 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	if (!mmget_not_zero(mm)) {
 		up_read(&mm->mmap_sem);
+		trace_skip_task_reaping(tsk->pid);
 		goto unlock_oom;
 	}
 
+	trace_start_task_reaping(tsk->pid);
+
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
 	 * is no longer stable. No barriers really needed because unmapping
@@ -544,6 +548,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 	 * put the oom_reaper out of the way.
 	 */
 	mmput_async(mm);
+	trace_finish_task_reaping(tsk->pid);
 unlock_oom:
 	mutex_unlock(&oom_lock);
 	return ret;
@@ -615,6 +620,7 @@ static void wake_oom_reaper(struct task_struct *tsk)
 	tsk->oom_reaper_list = oom_reaper_list;
 	oom_reaper_list = tsk;
 	spin_unlock(&oom_reaper_lock);
+	trace_wake_reaper(tsk->pid);
 	wake_up(&oom_reaper_wait);
 }
 
@@ -666,6 +672,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 	 */
 	__thaw_task(tsk);
 	atomic_inc(&oom_victims);
+	trace_mark_victim(tsk->pid);
 }
 
 /**

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b60cc7..96e93b2 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c

@@ -601,7 +601,7 @@ static inline void __wb_writeout_inc(struct bdi_writeback *wb)
 {
 	struct wb_domain *cgdom;
 
-	__inc_wb_stat(wb, WB_WRITTEN);
+	inc_wb_stat(wb, WB_WRITTEN);
 	wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
 			       wb->bdi->max_prop_frac);
 
@@ -2435,8 +2435,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
 		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 		__inc_node_page_state(page, NR_DIRTIED);
-		__inc_wb_stat(wb, WB_RECLAIMABLE);
-		__inc_wb_stat(wb, WB_DIRTIED);
+		inc_wb_stat(wb, WB_RECLAIMABLE);
+		inc_wb_stat(wb, WB_DIRTIED);
 		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
@@ -2741,7 +2741,7 @@ int test_clear_page_writeback(struct page *page)
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
-				__dec_wb_stat(wb, WB_WRITEBACK);
+				dec_wb_stat(wb, WB_WRITEBACK);
 				__wb_writeout_inc(wb);
 			}
 		}
@@ -2786,7 +2786,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 						page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
-				__inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
 			/*
 			 * We can come through here when swapping anonymous

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd65b60..6d30e91 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -2206,19 +2206,26 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
  * list of requested migratetype, possibly along with other pages from the same
  * block, depending on fragmentation avoidance heuristics. Returns true if
  * fallback was found so that __rmqueue_smallest() can grab it.
+ *
+ * The use of signed ints for order and current_order is a deliberate
+ * deviation from the rest of this file, to make the for loop
+ * condition simpler.
  */
 static inline bool
-__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 {
 	struct free_area *area;
-	unsigned int current_order;
+	int current_order;
 	struct page *page;
 	int fallback_mt;
 	bool can_steal;
 
-	/* Find the largest possible block of pages in the other list */
-	for (current_order = MAX_ORDER-1;
-				current_order >= order && current_order <= MAX_ORDER-1;
+	/*
+	 * Find the largest available free page in the other list. This roughly
+	 * approximates finding the pageblock with the most free pages, which
+	 * would be too costly to do exactly.
+	 */
+	for (current_order = MAX_ORDER - 1; current_order >= order;
 				--current_order) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
@@ -2226,19 +2233,50 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 		if (fallback_mt == -1)
 			continue;
 
-		page = list_first_entry(&area->free_list[fallback_mt],
-						struct page, lru);
+		/*
+		 * We cannot steal all free pages from the pageblock and the
+		 * requested migratetype is movable. In that case it's better to
+		 * steal and split the smallest available page instead of the
+		 * largest available page, because even if the next movable
+		 * allocation falls back into a different pageblock than this
+		 * one, it won't cause permanent fragmentation.
+		 */
+		if (!can_steal && start_migratetype == MIGRATE_MOVABLE
+					&& current_order > order)
+			goto find_smallest;
 
-		steal_suitable_fallback(zone, page, start_migratetype,
-								can_steal);
-
-		trace_mm_page_alloc_extfrag(page, order, current_order,
-			start_migratetype, fallback_mt);
-
-		return true;
+		goto do_steal;
 	}
 
 	return false;
+
+find_smallest:
+	for (current_order = order; current_order < MAX_ORDER;
+							current_order++) {
+		area = &(zone->free_area[current_order]);
+		fallback_mt = find_suitable_fallback(area, current_order,
+				start_migratetype, false, &can_steal);
+		if (fallback_mt != -1)
+			break;
+	}
+
+	/*
+	 * This should not happen - we already found a suitable fallback
+	 * when looking for the largest page.
+	 */
+	VM_BUG_ON(current_order == MAX_ORDER);
+
+do_steal:
+	page = list_first_entry(&area->free_list[fallback_mt],
+							struct page, lru);
+
+	steal_suitable_fallback(zone, page, start_migratetype, can_steal);
+
+	trace_mm_page_alloc_extfrag(page, order, current_order,
+		start_migratetype, fallback_mt);
+
+	return true;
+
 }
 
 /*
@@ -3246,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	/* The OOM killer will not help higher order allocs */
 	if (order > PAGE_ALLOC_COSTLY_ORDER)
 		goto out;
+	/*
+	 * We have already exhausted all our reclaim opportunities without any
+	 * success so it is time to admit defeat. We will skip the OOM killer
+	 * because it is very likely that the caller has a more reasonable
+	 * fallback than shooting a random task.
+	 */
+	if (gfp_mask & __GFP_RETRY_MAYFAIL)
+		goto out;
 	/* The OOM killer does not needlessly kill tasks for lowmem */
 	if (ac->high_zoneidx < ZONE_NORMAL)
 		goto out;
@@ -3375,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	}
 
 	/*
-	 * !costly requests are much more important than __GFP_REPEAT
+	 * !costly requests are much more important than __GFP_RETRY_MAYFAIL
 	 * costly ones because they are de facto nofail and invoke OOM
 	 * killer to move on while costly can fail and users are ready
 	 * to cope with that. 1/4 retries is rather arbitrary but we
@@ -3882,9 +3928,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 
 	/*
 	 * Do not retry costly high order allocations unless they are
-	 * __GFP_REPEAT
+	 * __GFP_RETRY_MAYFAIL
 	 */
-	if (costly_order && !(gfp_mask & __GFP_REPEAT))
+	if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
 		goto nopage;
 
 	if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -5240,7 +5286,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
 #endif
 		/* we have to stop all cpus to guarantee there is no user
 		   of zonelist */
-		stop_machine(__build_all_zonelists, pgdat, NULL);
+		stop_machine_cpuslocked(__build_all_zonelists, pgdat, NULL);
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();

diff --git a/mm/page_io.c b/mm/page_io.c
index 2da71e6..b6c4ac38 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -117,6 +117,7 @@ static void swap_slot_free_notify(struct page *page)
 static void end_swap_bio_read(struct bio *bio)
 {
 	struct page *page = bio->bi_io_vec[0].bv_page;
+	struct task_struct *waiter = bio->bi_private;
 
 	if (bio->bi_status) {
 		SetPageError(page);
@@ -132,7 +133,9 @@ static void end_swap_bio_read(struct bio *bio)
 	swap_slot_free_notify(page);
 out:
 	unlock_page(page);
+	WRITE_ONCE(bio->bi_private, NULL);
 	bio_put(bio);
+	wake_up_process(waiter);
 }
 
 int generic_swapfile_activate(struct swap_info_struct *sis,
@@ -329,11 +332,13 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	return ret;
 }
 
-int swap_readpage(struct page *page)
+int swap_readpage(struct page *page, bool do_poll)
 {
 	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
+	blk_qc_t qc;
+	struct block_device *bdev;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -372,9 +377,23 @@ int swap_readpage(struct page *page)
 		ret = -ENOMEM;
 		goto out;
 	}
+	bdev = bio->bi_bdev;
+	bio->bi_private = current;
 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
 	count_vm_event(PSWPIN);
-	submit_bio(bio);
+	bio_get(bio);
+	qc = submit_bio(bio);
+	while (do_poll) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio->bi_private))
+			break;
+
+		if (!blk_mq_poll(bdev_get_queue(bdev), qc))
+			break;
+	}
+	__set_current_state(TASK_RUNNING);
+	bio_put(bio);
+
 out:
 	return ret;
 }

diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 3606104..757410d 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c

@@ -8,6 +8,7 @@
 #include <linux/memory.h>
 #include <linux/hugetlb.h>
 #include <linux/page_owner.h>
+#include <linux/migrate.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -294,20 +295,5 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 struct page *alloc_migrate_target(struct page *page, unsigned long private,
 				  int **resultp)
 {
-	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
-
-	/*
-	 * TODO: allocate a destination hugepage from a nearest neighbor node,
-	 * accordance with memory policy of the user process if possible. For
-	 * now as a simple work-around, we use the next node for destination.
-	 */
-	if (PageHuge(page))
-		return alloc_huge_page_node(page_hstate(compound_head(page)),
-					    next_node_in(page_to_nid(page),
-							 node_online_map));
-
-	if (PageHighMem(page))
-		gfp_mask |= __GFP_HIGHMEM;
-
-	return alloc_page(gfp_mask);
+	return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }

diff --git a/mm/page_owner.c b/mm/page_owner.c
index 60634dc..0fd9dcf 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c

@@ -281,7 +281,11 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 				continue;
 
 			if (PageBuddy(page)) {
-				pfn += (1UL << page_order(page)) - 1;
+				unsigned long freepage_order;
+
+				freepage_order = page_order_unsafe(page);
+				if (freepage_order < MAX_ORDER)
+					pfn += (1UL << freepage_order) - 1;
 				continue;
 			}
 

diff --git a/mm/shmem.c b/mm/shmem.c
index 9418f5a..b0aa607 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -1977,10 +1977,12 @@ static int shmem_fault(struct vm_fault *vmf)
 	}
 
 	sgp = SGP_CACHE;
-	if (vma->vm_flags & VM_HUGEPAGE)
-		sgp = SGP_HUGE;
-	else if (vma->vm_flags & VM_NOHUGEPAGE)
+
+	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
 		sgp = SGP_NOHUGE;
+	else if (vma->vm_flags & VM_HUGEPAGE)
+		sgp = SGP_HUGE;
 
 	error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
 				  gfp, vma, vmf, &ret);

diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a56c398..c50b1a1 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c

@@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 
 		if (node_state(node, N_HIGH_MEMORY))
 			page = alloc_pages_node(
-				node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		else
 			page = alloc_pages(
-				GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
+				GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL,
 				get_order(size));
 		if (page)
 			return page_address(page);

diff --git a/mm/swap.c b/mm/swap.c
index 4f44dbd..60b1d2a 100644
--- a/mm/swap.c
+++ b/mm/swap.c

@@ -688,7 +688,7 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-void lru_add_drain_all(void)
+void lru_add_drain_all_cpuslocked(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
@@ -702,7 +702,6 @@ void lru_add_drain_all(void)
 		return;
 
 	mutex_lock(&lock);
-	get_online_cpus();
 	cpumask_clear(&has_work);
 
 	for_each_online_cpu(cpu) {
@@ -722,10 +721,16 @@ void lru_add_drain_all(void)
 	for_each_cpu(cpu, &has_work)
 		flush_work(&per_cpu(lru_add_drain_work, cpu));
 
-	put_online_cpus();
 	mutex_unlock(&lock);
 }
 
+void lru_add_drain_all(void)
+{
+	get_online_cpus();
+	lru_add_drain_all_cpuslocked();
+	put_online_cpus();
+}
+
 /**
  * release_pages - batched put_page()
  * @pages: array of pages to release

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 90c1032..13a1740 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c

@@ -273,11 +273,11 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	cache = &get_cpu_var(swp_slots);
+	cache = raw_cpu_ptr(&swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {
 		spin_lock_irq(&cache->free_lock);
 		/* Swap slots cache may be deactivated before acquiring lock */
-		if (!use_swap_slot_cache) {
+		if (!use_swap_slot_cache || !cache->slots_ret) {
 			spin_unlock_irq(&cache->free_lock);
 			goto direct_free;
 		}
@@ -297,7 +297,6 @@ int free_swap_slot(swp_entry_t entry)
 direct_free:
 		swapcache_free_entries(&entry, 1);
 	}
-	put_cpu_var(swp_slots);
 
 	return 0;
 }

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 9c71b6b..b68c930 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c

@@ -412,14 +412,14 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  * the swap entry is no longer in use.
  */
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
 {
 	bool page_was_allocated;
 	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
 			vma, addr, &page_was_allocated);
 
 	if (page_was_allocated)
-		swap_readpage(retpage);
+		swap_readpage(retpage, do_poll);
 
 	return retpage;
 }
@@ -496,11 +496,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long start_offset, end_offset;
 	unsigned long mask;
 	struct blk_plug plug;
+	bool do_poll = true;
 
 	mask = swapin_nr_pages(offset) - 1;
 	if (!mask)
 		goto skip;
 
+	do_poll = false;
 	/* Read a page_cluster sized and aligned cluster around offset. */
 	start_offset = offset & ~mask;
 	end_offset = offset | mask;
@@ -511,7 +513,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	for (offset = start_offset; offset <= end_offset ; offset++) {
 		/* Ok, do the async read-ahead now */
 		page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
-						gfp_mask, vma, addr);
+						gfp_mask, vma, addr, false);
 		if (!page)
 			continue;
 		if (offset != entry_offset && likely(!PageTransCompound(page)))
@@ -522,7 +524,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
 skip:
-	return read_swap_cache_async(entry, gfp_mask, vma, addr);
+	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
 }
 
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 811d90e..6ba4aab 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c

@@ -1868,7 +1868,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 		swap_map = &si->swap_map[i];
 		entry = swp_entry(type, i);
 		page = read_swap_cache_async(entry,
-					GFP_HIGHUSER_MOVABLE, NULL, 0);
+					GFP_HIGHUSER_MOVABLE, NULL, 0, false);
 		if (!page) {
 			/*
 			 * Either swap_duplicate() failed because entry

diff --git a/mm/truncate.c b/mm/truncate.c
index 6479ed2..2330223 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c

@@ -530,9 +530,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			} else if (PageTransHuge(page)) {
 				index += HPAGE_PMD_NR - 1;
 				i += HPAGE_PMD_NR - 1;
-				/* 'end' is in the middle of THP */
-				if (index ==  round_down(end, HPAGE_PMD_NR))
+				/*
+				 * 'end' is in the middle of THP. Don't
+				 * invalidate the page as the part outside of
+				 * 'end' could be still useful.
+				 */
+				if (index > end) {
+					unlock_page(page);
 					continue;
+				}
 			}
 
 			ret = invalidate_inode_page(page);

diff --git a/mm/util.c b/mm/util.c
index 26be640..ee250e2 100644
--- a/mm/util.c
+++ b/mm/util.c

@@ -339,9 +339,9 @@ EXPORT_SYMBOL(vm_mmap);
  * Uses kmalloc to get the memory but if the allocation fails then falls back
  * to the vmalloc allocator. Use kvfree for freeing the memory.
  *
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
- * is supported only for large (>32kB) allocations, and it should be used only if
- * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
+ * preferable to the vmalloc fallback, due to visible performance drawbacks.
  *
  * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
  */
@@ -366,13 +366,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 	if (size > PAGE_SIZE) {
 		kmalloc_flags |= __GFP_NOWARN;
 
-		/*
-		 * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
-		 * requests because there is no other way to tell the allocator
-		 * that we want to fail rather than retry endlessly.
-		 */
-		if (!(kmalloc_flags & __GFP_REPEAT) ||
-				(size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+		if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
 			kmalloc_flags |= __GFP_NORETRY;
 	}
 

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6211a80..8698c1c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c

@@ -325,6 +325,7 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 
 /*** Global kva allocator ***/
 
+#define VM_LAZY_FREE	0x02
 #define VM_VM_AREA	0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
@@ -1497,6 +1498,7 @@ struct vm_struct *remove_vm_area(const void *addr)
 		spin_lock(&vmap_area_lock);
 		va->vm = NULL;
 		va->flags &= ~VM_VM_AREA;
+		va->flags |= VM_LAZY_FREE;
 		spin_unlock(&vmap_area_lock);
 
 		vmap_debug_free_range(va->va_start, va->va_end);
@@ -1793,7 +1795,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
  *	allocator with @gfp_mask flags.  Map them into contiguous
  *	kernel virtual space, using a pagetable protection of @prot.
  *
- *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ *	Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL
  *	and __GFP_NOFAIL are not supported
  *
  *	Any use of gfp flags outside of GFP_KERNEL should be consulted
@@ -2704,8 +2706,14 @@ static int s_show(struct seq_file *m, void *p)
 	 * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
 	 * behalf of vmap area is being tear down or vm_map_ram allocation.
 	 */
-	if (!(va->flags & VM_VM_AREA))
+	if (!(va->flags & VM_VM_AREA)) {
+		seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
+			(void *)va->va_start, (void *)va->va_end,
+			va->va_end - va->va_start,
+			va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+
 		return 0;
+	}
 
 	v = va->vm;
 

diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index ce0618b..85350ce 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c

@@ -93,12 +93,25 @@ enum vmpressure_levels {
 	VMPRESSURE_NUM_LEVELS,
 };
 
+enum vmpressure_modes {
+	VMPRESSURE_NO_PASSTHROUGH = 0,
+	VMPRESSURE_HIERARCHY,
+	VMPRESSURE_LOCAL,
+	VMPRESSURE_NUM_MODES,
+};
+
 static const char * const vmpressure_str_levels[] = {
 	[VMPRESSURE_LOW] = "low",
 	[VMPRESSURE_MEDIUM] = "medium",
 	[VMPRESSURE_CRITICAL] = "critical",
 };
 
+static const char * const vmpressure_str_modes[] = {
+	[VMPRESSURE_NO_PASSTHROUGH] = "default",
+	[VMPRESSURE_HIERARCHY] = "hierarchy",
+	[VMPRESSURE_LOCAL] = "local",
+};
+
 static enum vmpressure_levels vmpressure_level(unsigned long pressure)
 {
 	if (pressure >= vmpressure_level_critical)
@@ -141,27 +154,31 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
 struct vmpressure_event {
 	struct eventfd_ctx *efd;
 	enum vmpressure_levels level;
+	enum vmpressure_modes mode;
 	struct list_head node;
 };
 
 static bool vmpressure_event(struct vmpressure *vmpr,
-			     enum vmpressure_levels level)
+			     const enum vmpressure_levels level,
+			     bool ancestor, bool signalled)
 {
 	struct vmpressure_event *ev;
-	bool signalled = false;
+	bool ret = false;
 
 	mutex_lock(&vmpr->events_lock);
-
 	list_for_each_entry(ev, &vmpr->events, node) {
-		if (level >= ev->level) {
-			eventfd_signal(ev->efd, 1);
-			signalled = true;
-		}
+		if (ancestor && ev->mode == VMPRESSURE_LOCAL)
+			continue;
+		if (signalled && ev->mode == VMPRESSURE_NO_PASSTHROUGH)
+			continue;
+		if (level < ev->level)
+			continue;
+		eventfd_signal(ev->efd, 1);
+		ret = true;
 	}
-
 	mutex_unlock(&vmpr->events_lock);
 
-	return signalled;
+	return ret;
 }
 
 static void vmpressure_work_fn(struct work_struct *work)
@@ -170,6 +187,8 @@ static void vmpressure_work_fn(struct work_struct *work)
 	unsigned long scanned;
 	unsigned long reclaimed;
 	enum vmpressure_levels level;
+	bool ancestor = false;
+	bool signalled = false;
 
 	spin_lock(&vmpr->sr_lock);
 	/*
@@ -194,12 +213,9 @@ static void vmpressure_work_fn(struct work_struct *work)
 	level = vmpressure_calc_level(scanned, reclaimed);
 
 	do {
-		if (vmpressure_event(vmpr, level))
-			break;
-		/*
-		 * If not handled, propagate the event upward into the
-		 * hierarchy.
-		 */
+		if (vmpressure_event(vmpr, level, ancestor, signalled))
+			signalled = true;
+		ancestor = true;
 	} while ((vmpr = vmpressure_parent(vmpr)));
 }
 
@@ -326,17 +342,40 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
 	vmpressure(gfp, memcg, true, vmpressure_win, 0);
 }
 
+static enum vmpressure_levels str_to_level(const char *arg)
+{
+	enum vmpressure_levels level;
+
+	for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++)
+		if (!strcmp(vmpressure_str_levels[level], arg))
+			return level;
+	return -1;
+}
+
+static enum vmpressure_modes str_to_mode(const char *arg)
+{
+	enum vmpressure_modes mode;
+
+	for (mode = 0; mode < VMPRESSURE_NUM_MODES; mode++)
+		if (!strcmp(vmpressure_str_modes[mode], arg))
+			return mode;
+	return -1;
+}
+
+#define MAX_VMPRESSURE_ARGS_LEN	(strlen("critical") + strlen("hierarchy") + 2)
+
 /**
  * vmpressure_register_event() - Bind vmpressure notifications to an eventfd
  * @memcg:	memcg that is interested in vmpressure notifications
  * @eventfd:	eventfd context to link notifications with
- * @args:	event arguments (used to set up a pressure level threshold)
+ * @args:	event arguments (pressure level threshold, optional mode)
  *
  * This function associates eventfd context with the vmpressure
  * infrastructure, so that the notifications will be delivered to the
- * @eventfd. The @args parameter is a string that denotes pressure level
- * threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
- * "critical").
+ * @eventfd. The @args parameter is a comma-delimited string that denotes a
+ * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium",
+ * or "critical") and an optional mode (one of vmpressure_str_modes, i.e.
+ * "hierarchy" or "local").
  *
  * To be used as memcg event method.
  */
@@ -345,28 +384,53 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
 {
 	struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
 	struct vmpressure_event *ev;
-	int level;
+	enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
+	enum vmpressure_levels level = -1;
+	char *spec, *spec_orig;
+	char *token;
+	int ret = 0;
 
-	for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) {
-		if (!strcmp(vmpressure_str_levels[level], args))
-			break;
+	spec_orig = spec = kzalloc(MAX_VMPRESSURE_ARGS_LEN + 1, GFP_KERNEL);
+	if (!spec) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	strncpy(spec, args, MAX_VMPRESSURE_ARGS_LEN);
+
+	/* Find required level */
+	token = strsep(&spec, ",");
+	level = str_to_level(token);
+	if (level == -1) {
+		ret = -EINVAL;
+		goto out;
 	}
 
-	if (level >= VMPRESSURE_NUM_LEVELS)
-		return -EINVAL;
+	/* Find optional mode */
+	token = strsep(&spec, ",");
+	if (token) {
+		mode = str_to_mode(token);
+		if (mode == -1) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
 
 	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
-	if (!ev)
-		return -ENOMEM;
+	if (!ev) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	ev->efd = eventfd;
 	ev->level = level;
+	ev->mode = mode;
 
 	mutex_lock(&vmpr->events_lock);
 	list_add(&ev->node, &vmpr->events);
 	mutex_unlock(&vmpr->events_lock);
-
-	return 0;
+out:
+	kfree(spec_orig);
+	return ret;
 }
 
 /**

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9e95faf..a1af041 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -2228,8 +2228,17 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
 		}
 
 		if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
-			scan_balance = SCAN_ANON;
-			goto out;
+			/*
+			 * Force SCAN_ANON if there are enough inactive
+			 * anonymous pages on the LRU in eligible zones.
+			 * Otherwise, the small LRU gets thrashed.
+			 */
+			if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+			    lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
+					>> sc->priority) {
+				scan_balance = SCAN_ANON;
+				goto out;
+			}
 		}
 	}
 
@@ -2497,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 		return false;
 
 	/* Consider stopping depending on scan and reclaim activity */
-	if (sc->gfp_mask & __GFP_REPEAT) {
+	if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) {
 		/*
-		 * For __GFP_REPEAT allocations, stop reclaiming if the
+		 * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the
 		 * full LRU list has been scanned and we are still failing
 		 * to reclaim pages. This full LRU scan is potentially
-		 * expensive but a __GFP_REPEAT caller really wants to succeed
+		 * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed
 		 */
 		if (!nr_reclaimed && !nr_scanned)
 			return false;
 	} else {
 		/*
-		 * For non-__GFP_REPEAT allocations which can presumably
+		 * For non-__GFP_RETRY_MAYFAIL allocations which can presumably
 		 * fail without consequence, stop if we failed to reclaim
 		 * any pages from the last SWAP_CLUSTER_MAX number of
 		 * pages that were scanned. This will return to the

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 744ceae..9a4441b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c

@@ -1130,7 +1130,7 @@ static void frag_stop(struct seq_file *m, void *arg)
  * If @assert_populated is true, only use callback for zones that are populated.
  */
 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
-		bool assert_populated,
+		bool assert_populated, bool nolock,
 		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
 {
 	struct zone *zone;
@@ -1141,9 +1141,11 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 		if (assert_populated && !populated_zone(zone))
 			continue;
 
-		spin_lock_irqsave(&zone->lock, flags);
+		if (!nolock)
+			spin_lock_irqsave(&zone->lock, flags);
 		print(m, pgdat, zone);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		if (!nolock)
+			spin_unlock_irqrestore(&zone->lock, flags);
 	}
 }
 #endif
@@ -1166,7 +1168,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
 static int frag_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
-	walk_zones_in_node(m, pgdat, true, frag_show_print);
+	walk_zones_in_node(m, pgdat, true, false, frag_show_print);
 	return 0;
 }
 
@@ -1207,7 +1209,7 @@ static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
 		seq_printf(m, "%6d ", order);
 	seq_putc(m, '\n');
 
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showfree_print);
+	walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
 
 	return 0;
 }
@@ -1258,7 +1260,8 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
 		seq_printf(m, "%12s ", migratetype_names[mtype]);
 	seq_putc(m, '\n');
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showblockcount_print);
+	walk_zones_in_node(m, pgdat, true, false,
+		pagetypeinfo_showblockcount_print);
 
 	return 0;
 }
@@ -1284,7 +1287,8 @@ static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
 		seq_printf(m, "%12s ", migratetype_names[mtype]);
 	seq_putc(m, '\n');
 
-	walk_zones_in_node(m, pgdat, true, pagetypeinfo_showmixedcount_print);
+	walk_zones_in_node(m, pgdat, true, true,
+		pagetypeinfo_showmixedcount_print);
 #endif /* CONFIG_PAGE_OWNER */
 }
 
@@ -1446,7 +1450,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 static int zoneinfo_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
-	walk_zones_in_node(m, pgdat, false, zoneinfo_show_print);
+	walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
 	return 0;
 }
 
@@ -1852,7 +1856,7 @@ static int unusable_show(struct seq_file *m, void *arg)
 	if (!node_state(pgdat->node_id, N_MEMORY))
 		return 0;
 
-	walk_zones_in_node(m, pgdat, true, unusable_show_print);
+	walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
 
 	return 0;
 }
@@ -1904,7 +1908,7 @@ static int extfrag_show(struct seq_file *m, void *arg)
 {
 	pg_data_t *pgdat = (pg_data_t *)arg;
 
-	walk_zones_in_node(m, pgdat, true, extfrag_show_print);
+	walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
 
 	return 0;
 }

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index d41edd2..013eea7 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c

@@ -116,6 +116,11 @@
 #define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
 #define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
 
+#define FULLNESS_BITS	2
+#define CLASS_BITS	8
+#define ISOLATED_BITS	3
+#define MAGIC_VAL_BITS	8
+
 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
 /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
 #define ZS_MIN_ALLOC_SIZE \
@@ -137,6 +142,8 @@
  *  (reason above)
  */
 #define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> CLASS_BITS)
+#define ZS_SIZE_CLASSES	(DIV_ROUND_UP(ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE, \
+				      ZS_SIZE_CLASS_DELTA) + 1)
 
 enum fullness_group {
 	ZS_EMPTY,
@@ -169,11 +176,6 @@ static struct vfsmount *zsmalloc_mnt;
 #endif
 
 /*
- * number of size_classes
- */
-static int zs_size_classes;
-
-/*
  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
  *	n <= N / f, where
  * n = number of allocated objects
@@ -244,7 +246,7 @@ struct link_free {
 struct zs_pool {
 	const char *name;
 
-	struct size_class **size_class;
+	struct size_class *size_class[ZS_SIZE_CLASSES];
 	struct kmem_cache *handle_cachep;
 	struct kmem_cache *zspage_cachep;
 
@@ -268,11 +270,6 @@ struct zs_pool {
 #endif
 };
 
-#define FULLNESS_BITS	2
-#define CLASS_BITS	8
-#define ISOLATED_BITS	3
-#define MAGIC_VAL_BITS	8
-
 struct zspage {
 	struct {
 		unsigned int fullness:FULLNESS_BITS;
@@ -469,7 +466,7 @@ static bool is_zspage_isolated(struct zspage *zspage)
 	return zspage->isolated;
 }
 
-static int is_first_page(struct page *page)
+static __maybe_unused int is_first_page(struct page *page)
 {
 	return PagePrivate(page);
 }
@@ -551,7 +548,7 @@ static int get_size_class_index(int size)
 		idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
 				ZS_SIZE_CLASS_DELTA);
 
-	return min(zs_size_classes - 1, idx);
+	return min_t(int, ZS_SIZE_CLASSES - 1, idx);
 }
 
 static inline void zs_stat_inc(struct size_class *class,
@@ -610,7 +607,7 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
 			"obj_allocated", "obj_used", "pages_used",
 			"pages_per_zspage", "freeable");
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		class = pool->size_class[i];
 
 		if (class->index != i)
@@ -1294,17 +1291,6 @@ static int zs_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
-static void __init init_zs_size_classes(void)
-{
-	int nr;
-
-	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
-	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
-		nr += 1;
-
-	zs_size_classes = nr;
-}
-
 static bool can_merge(struct size_class *prev, int pages_per_zspage,
 					int objs_per_zspage)
 {
@@ -2145,7 +2131,7 @@ static void async_free_zspage(struct work_struct *work)
 	struct zs_pool *pool = container_of(work, struct zs_pool,
 					free_work);
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		class = pool->size_class[i];
 		if (class->index != i)
 			continue;
@@ -2263,7 +2249,7 @@ unsigned long zs_compact(struct zs_pool *pool)
 	int i;
 	struct size_class *class;
 
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		class = pool->size_class[i];
 		if (!class)
 			continue;
@@ -2309,7 +2295,7 @@ static unsigned long zs_shrinker_count(struct shrinker *shrinker,
 	struct zs_pool *pool = container_of(shrinker, struct zs_pool,
 			shrinker);
 
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		class = pool->size_class[i];
 		if (!class)
 			continue;
@@ -2361,12 +2347,6 @@ struct zs_pool *zs_create_pool(const char *name)
 		return NULL;
 
 	init_deferred_free(pool);
-	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
-			GFP_KERNEL);
-	if (!pool->size_class) {
-		kfree(pool);
-		return NULL;
-	}
 
 	pool->name = kstrdup(name, GFP_KERNEL);
 	if (!pool->name)
@@ -2379,7 +2359,7 @@ struct zs_pool *zs_create_pool(const char *name)
 	 * Iterate reversely, because, size of size_class that we want to use
 	 * for merging should be larger or equal to current size.
 	 */
-	for (i = zs_size_classes - 1; i >= 0; i--) {
+	for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
 		int size;
 		int pages_per_zspage;
 		int objs_per_zspage;
@@ -2453,7 +2433,7 @@ void zs_destroy_pool(struct zs_pool *pool)
 	zs_unregister_migration(pool);
 	zs_pool_stat_destroy(pool);
 
-	for (i = 0; i < zs_size_classes; i++) {
+	for (i = 0; i < ZS_SIZE_CLASSES; i++) {
 		int fg;
 		struct size_class *class = pool->size_class[i];
 
@@ -2492,8 +2472,6 @@ static int __init zs_init(void)
 	if (ret)
 		goto hp_setup_fail;
 
-	init_zs_size_classes();
-
 #ifdef CONFIG_ZPOOL
 	zpool_register_driver(&zs_zpool_driver);
 #endif

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 09dcdb9..a0b11e7 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c

@@ -323,7 +323,8 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
 			__mdb_entry_to_br_ip(entry, &complete_info->ip);
 			mdb.obj.complete_priv = complete_info;
 			mdb.obj.complete = br_mdb_complete;
-			switchdev_port_obj_add(port_dev, &mdb.obj);
+			if (switchdev_port_obj_add(port_dev, &mdb.obj))
+				kfree(complete_info);
 		}
 	} else if (port_dev && type == RTM_DELMDB) {
 		switchdev_port_obj_del(port_dev, &mdb.obj);

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 47e94b5..3d265c5 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c

@@ -85,6 +85,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
 	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
+	case CEPH_MSG_OSD_BACKOFF: return "osd_backoff";
 	default: return "unknown";
 	}
 }

diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 5bf94c04..4b428f4 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c

@@ -1,6 +1,7 @@
 #ifdef __KERNEL__
 # include <linux/slab.h>
 # include <linux/crush/crush.h>
+void clear_choose_args(struct crush_map *c);
 #else
 # include "crush_compat.h"
 # include "crush.h"
@@ -127,6 +128,8 @@ void crush_destroy(struct crush_map *map)
 
 #ifndef __KERNEL__
 	kfree(map->choose_tries);
+#else
+	clear_choose_args(map);
 #endif
 	kfree(map);
 }

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b5cd8c2..746b145 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c

@@ -302,19 +302,42 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
+static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
+				     const struct crush_choose_arg *arg,
+				     int position)
+{
+	if (!arg || !arg->weight_set || arg->weight_set_size == 0)
+		return bucket->item_weights;
+
+	if (position >= arg->weight_set_size)
+		position = arg->weight_set_size - 1;
+	return arg->weight_set[position].weights;
+}
+
+static __s32 *get_choose_arg_ids(const struct crush_bucket_straw2 *bucket,
+				 const struct crush_choose_arg *arg)
+{
+	if (!arg || !arg->ids)
+		return bucket->h.items;
+
+	return arg->ids;
+}
+
 static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
-				int x, int r)
+				int x, int r,
+				const struct crush_choose_arg *arg,
+				int position)
 {
 	unsigned int i, high = 0;
 	unsigned int u;
-	unsigned int w;
 	__s64 ln, draw, high_draw = 0;
+	__u32 *weights = get_choose_arg_weights(bucket, arg, position);
+	__s32 *ids = get_choose_arg_ids(bucket, arg);
 
 	for (i = 0; i < bucket->h.size; i++) {
-		w = bucket->item_weights[i];
-		if (w) {
-			u = crush_hash32_3(bucket->h.hash, x,
-					   bucket->h.items[i], r);
+		dprintk("weight 0x%x item %d\n", weights[i], ids[i]);
+		if (weights[i]) {
+			u = crush_hash32_3(bucket->h.hash, x, ids[i], r);
 			u &= 0xffff;
 
 			/*
@@ -335,7 +358,7 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 			 * weight means a larger (less negative) value
 			 * for draw.
 			 */
-			draw = div64_s64(ln, w);
+			draw = div64_s64(ln, weights[i]);
 		} else {
 			draw = S64_MIN;
 		}
@@ -352,7 +375,9 @@ static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 
 static int crush_bucket_choose(const struct crush_bucket *in,
 			       struct crush_work_bucket *work,
-			       int x, int r)
+			       int x, int r,
+			       const struct crush_choose_arg *arg,
+			       int position)
 {
 	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
 	BUG_ON(in->size == 0);
@@ -374,7 +399,7 @@ static int crush_bucket_choose(const struct crush_bucket *in,
 	case CRUSH_BUCKET_STRAW2:
 		return bucket_straw2_choose(
 			(const struct crush_bucket_straw2 *)in,
-			x, r);
+			x, r, arg, position);
 	default:
 		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
 		return in->items[0];
@@ -436,7 +461,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 			       unsigned int vary_r,
 			       unsigned int stable,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -486,7 +512,10 @@ static int crush_choose_firstn(const struct crush_map *map,
 				else
 					item = crush_bucket_choose(
 						in, work->work[-1-in->id],
-						x, r);
+						x, r,
+						(choose_args ?
+						 &choose_args[-1-in->id] : 0),
+						outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					skip_rep = 1;
@@ -543,7 +572,8 @@ static int crush_choose_firstn(const struct crush_map *map,
 							    vary_r,
 							    stable,
 							    NULL,
-							    sub_r) <= outpos)
+							    sub_r,
+							    choose_args) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
 					} else {
@@ -620,7 +650,8 @@ static void crush_choose_indep(const struct crush_map *map,
 			       unsigned int recurse_tries,
 			       int recurse_to_leaf,
 			       int *out2,
-			       int parent_r)
+			       int parent_r,
+			       const struct crush_choose_arg *choose_args)
 {
 	const struct crush_bucket *in = bucket;
 	int endpos = outpos + left;
@@ -692,7 +723,10 @@ static void crush_choose_indep(const struct crush_map *map,
 
 				item = crush_bucket_choose(
 					in, work->work[-1-in->id],
-					x, r);
+					x, r,
+					(choose_args ?
+					 &choose_args[-1-in->id] : 0),
+					outpos);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					out[rep] = CRUSH_ITEM_NONE;
@@ -746,7 +780,8 @@ static void crush_choose_indep(const struct crush_map *map,
 							x, 1, numrep, 0,
 							out2, rep,
 							recurse_tries, 0,
-							0, NULL, r);
+							0, NULL, r,
+							choose_args);
 						if (out2[rep] == CRUSH_ITEM_NONE) {
 							/* placed nothing; no leaf */
 							break;
@@ -823,7 +858,7 @@ void crush_init_workspace(const struct crush_map *map, void *v)
 	 * set the pointer first and then reserve the space for it to
 	 * point to by incrementing the point.
 	 */
-	v += sizeof(struct crush_work *);
+	v += sizeof(struct crush_work);
 	w->work = v;
 	v += map->max_buckets * sizeof(struct crush_work_bucket *);
 	for (b = 0; b < map->max_buckets; ++b) {
@@ -854,11 +889,12 @@ void crush_init_workspace(const struct crush_map *map, void *v)
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
  * @cwin: pointer to at least crush_work_size() bytes of memory
+ * @choose_args: weights and ids for each known bucket
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
 		  const __u32 *weight, int weight_max,
-		  void *cwin)
+		  void *cwin, const struct crush_choose_arg *choose_args)
 {
 	int result_len;
 	struct crush_work *cw = cwin;
@@ -968,11 +1004,6 @@ int crush_do_rule(const struct crush_map *map,
 
 			for (i = 0; i < wsize; i++) {
 				int bno;
-				/*
-				 * see CRUSH_N, CRUSH_N_MINUS macros.
-				 * basically, numrep <= 0 means relative to
-				 * the provided result_max
-				 */
 				numrep = curstep->arg1;
 				if (numrep <= 0) {
 					numrep += result_max;
@@ -1013,7 +1044,8 @@ int crush_do_rule(const struct crush_map *map,
 						vary_r,
 						stable,
 						c+osize,
-						0);
+						0,
+						choose_args);
 				} else {
 					out_size = ((numrep < (result_max-osize)) ?
 						    numrep : (result_max-osize));
@@ -1030,7 +1062,8 @@ int crush_do_rule(const struct crush_map *map,
 						   choose_leaf_tries : 1,
 						recurse_to_leaf,
 						c+osize,
-						0);
+						0,
+						choose_args);
 					osize += out_size;
 				}
 			}

diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 71ba139..fa5233e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c

@@ -77,7 +77,7 @@ static int osdmap_show(struct seq_file *s, void *p)
 	}
 	for (i = 0; i < map->max_osd; i++) {
 		struct ceph_entity_addr *addr = &map->osd_addr[i];
-		int state = map->osd_state[i];
+		u32 state = map->osd_state[i];
 		char sb[64];
 
 		seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
 		seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
 			   pg->pgid.seed, pg->primary_temp.osd);
 	}
+	for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap.len; i++)
+			seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap.osds[i]);
+		seq_printf(s, "]\n");
+	}
+	for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(n, struct ceph_pg_mapping, node);
+
+		seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
+			   pg->pgid.seed);
+		for (i = 0; i < pg->pg_upmap_items.len; i++)
+			seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
+				   pg->pg_upmap_items.from_to[i][0],
+				   pg->pg_upmap_items.from_to[i][1]);
+		seq_printf(s, "]\n");
+	}
 
 	up_read(&osdc->lock);
 	return 0;
@@ -147,17 +170,26 @@ static int monc_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static void dump_spgid(struct seq_file *s, const struct ceph_spg *spgid)
+{
+	seq_printf(s, "%llu.%x", spgid->pgid.pool, spgid->pgid.seed);
+	if (spgid->shard != CEPH_SPG_NOSHARD)
+		seq_printf(s, "s%d", spgid->shard);
+}
+
 static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
 {
 	int i;
 
-	seq_printf(s, "osd%d\t%llu.%x\t[", t->osd, t->pgid.pool, t->pgid.seed);
+	seq_printf(s, "osd%d\t%llu.%x\t", t->osd, t->pgid.pool, t->pgid.seed);
+	dump_spgid(s, &t->spgid);
+	seq_puts(s, "\t[");
 	for (i = 0; i < t->up.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->up.osds[i]);
 	seq_printf(s, "]/%d\t[", t->up.primary);
 	for (i = 0; i < t->acting.size; i++)
 		seq_printf(s, "%s%d", (!i ? "" : ","), t->acting.osds[i]);
-	seq_printf(s, "]/%d\t", t->acting.primary);
+	seq_printf(s, "]/%d\te%u\t", t->acting.primary, t->epoch);
 	if (t->target_oloc.pool_ns) {
 		seq_printf(s, "%*pE/%*pE\t0x%x",
 			(int)t->target_oloc.pool_ns->len,
@@ -234,6 +266,73 @@ static void dump_linger_requests(struct seq_file *s, struct ceph_osd *osd)
 	mutex_unlock(&osd->lock);
 }
 
+static void dump_snapid(struct seq_file *s, u64 snapid)
+{
+	if (snapid == CEPH_NOSNAP)
+		seq_puts(s, "head");
+	else if (snapid == CEPH_SNAPDIR)
+		seq_puts(s, "snapdir");
+	else
+		seq_printf(s, "%llx", snapid);
+}
+
+static void dump_name_escaped(struct seq_file *s, unsigned char *name,
+			      size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (name[i] == '%' || name[i] == ':' || name[i] == '/' ||
+		    name[i] < 32 || name[i] >= 127) {
+			seq_printf(s, "%%%02x", name[i]);
+		} else {
+			seq_putc(s, name[i]);
+		}
+	}
+}
+
+static void dump_hoid(struct seq_file *s, const struct ceph_hobject_id *hoid)
+{
+	if (hoid->snapid == 0 && hoid->hash == 0 && !hoid->is_max &&
+	    hoid->pool == S64_MIN) {
+		seq_puts(s, "MIN");
+		return;
+	}
+	if (hoid->is_max) {
+		seq_puts(s, "MAX");
+		return;
+	}
+	seq_printf(s, "%lld:%08x:", hoid->pool, hoid->hash_reverse_bits);
+	dump_name_escaped(s, hoid->nspace, hoid->nspace_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->key, hoid->key_len);
+	seq_putc(s, ':');
+	dump_name_escaped(s, hoid->oid, hoid->oid_len);
+	seq_putc(s, ':');
+	dump_snapid(s, hoid->snapid);
+}
+
+static void dump_backoffs(struct seq_file *s, struct ceph_osd *osd)
+{
+	struct rb_node *n;
+
+	mutex_lock(&osd->lock);
+	for (n = rb_first(&osd->o_backoffs_by_id); n; n = rb_next(n)) {
+		struct ceph_osd_backoff *backoff =
+		    rb_entry(n, struct ceph_osd_backoff, id_node);
+
+		seq_printf(s, "osd%d\t", osd->o_osd);
+		dump_spgid(s, &backoff->spgid);
+		seq_printf(s, "\t%llu\t", backoff->id);
+		dump_hoid(s, backoff->begin);
+		seq_putc(s, '\t');
+		dump_hoid(s, backoff->end);
+		seq_putc(s, '\n');
+	}
+
+	mutex_unlock(&osd->lock);
+}
+
 static int osdc_show(struct seq_file *s, void *pp)
 {
 	struct ceph_client *client = s->private;
@@ -259,6 +358,13 @@ static int osdc_show(struct seq_file *s, void *pp)
 	}
 	dump_linger_requests(s, &osdc->homeless_osd);
 
+	seq_puts(s, "BACKOFFS\n");
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+		dump_backoffs(s, osd);
+	}
+
 	up_read(&osdc->lock);
 	return 0;
 }

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 588a919..0c31035 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c

@@ -1288,13 +1288,16 @@ static void prepare_write_message(struct ceph_connection *con)
 		m->hdr.seq = cpu_to_le64(++con->out_seq);
 		m->needs_out_seq = false;
 	}
-	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
+
+	if (con->ops->reencode_message)
+		con->ops->reencode_message(m);
 
 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
 	     m->data_length);
-	BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
+	WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
+	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
 
 	/* tag + hdr + front + middle */
 	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
@@ -2033,8 +2036,7 @@ static int process_connect(struct ceph_connection *con)
 {
 	u64 sup_feat = from_msgr(con->msgr)->supported_features;
 	u64 req_feat = from_msgr(con->msgr)->required_features;
-	u64 server_feat = ceph_sanitize_features(
-				le64_to_cpu(con->in_reply.features));
+	u64 server_feat = le64_to_cpu(con->in_reply.features);
 	int ret;
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);

diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 250f11f..8756757 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c

@@ -6,6 +6,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/debugfs.h>
@@ -297,6 +298,10 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
 
 	mutex_lock(&monc->mutex);
 	if (monc->sub_renew_sent) {
+		/*
+		 * This is only needed for legacy (infernalis or older)
+		 * MONs -- see delayed_work().
+		 */
 		monc->sub_renew_after = monc->sub_renew_sent +
 					    (seconds >> 1) * HZ - 1;
 		dout("%s sent %lu duration %d renew after %lu\n", __func__,
@@ -955,7 +960,8 @@ static void delayed_work(struct work_struct *work)
 			__validate_auth(monc);
 		}
 
-		if (is_auth) {
+		if (is_auth &&
+		    !(monc->con.peer_features & CEPH_FEATURE_MON_STATEFUL_SUB)) {
 			unsigned long now = jiffies;
 
 			dout("%s renew subs? now %lu renew after %lu\n",

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 924f07c..86a9737 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -12,6 +12,7 @@
 #include <linux/bio.h>
 #endif
 
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/messenger.h>
@@ -49,6 +50,7 @@ static void link_linger(struct ceph_osd *osd,
 			struct ceph_osd_linger_request *lreq);
 static void unlink_linger(struct ceph_osd *osd,
 			  struct ceph_osd_linger_request *lreq);
+static void clear_backoffs(struct ceph_osd *osd);
 
 #if 1
 static inline bool rwsem_is_wrlocked(struct rw_semaphore *sem)
@@ -373,6 +375,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	ceph_oloc_copy(&dest->target_oloc, &src->target_oloc);
 
 	dest->pgid = src->pgid; /* struct */
+	dest->spgid = src->spgid; /* struct */
 	dest->pg_num = src->pg_num;
 	dest->pg_num_mask = src->pg_num_mask;
 	ceph_osds_copy(&dest->acting, &src->acting);
@@ -384,6 +387,9 @@ static void target_copy(struct ceph_osd_request_target *dest,
 	dest->flags = src->flags;
 	dest->paused = src->paused;
 
+	dest->epoch = src->epoch;
+	dest->last_force_resend = src->last_force_resend;
+
 	dest->osd = src->osd;
 }
 
@@ -537,7 +543,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 }
 EXPORT_SYMBOL(ceph_osdc_alloc_request);
 
-static int ceph_oloc_encoding_size(struct ceph_object_locator *oloc)
+static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
 {
 	return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
 }
@@ -552,17 +558,21 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
 	/* create request message */
-	msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
-	msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 4 + 4; /* hash, osdmap_epoch, flags */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			sizeof(struct ceph_osd_reqid); /* reqid */
+	msg_size += sizeof(struct ceph_blkin_trace_info); /* trace */
+	msg_size += 4 + sizeof(struct ceph_timespec); /* client_inc, mtime */
 	msg_size += CEPH_ENCODING_START_BLK_LEN +
 			ceph_oloc_encoding_size(&req->r_base_oloc); /* oloc */
-	msg_size += 1 + 8 + 4 + 4; /* pgid */
 	msg_size += 4 + req->r_base_oid.name_len; /* oid */
 	msg_size += 2 + req->r_num_ops * sizeof(struct ceph_osd_op);
 	msg_size += 8; /* snapid */
 	msg_size += 8; /* snap_seq */
 	msg_size += 4 + 8 * (req->r_snapc ? req->r_snapc->num_snaps : 0);
-	msg_size += 4; /* retry_attempt */
+	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -1010,6 +1020,8 @@ static void osd_init(struct ceph_osd *osd)
 	RB_CLEAR_NODE(&osd->o_node);
 	osd->o_requests = RB_ROOT;
 	osd->o_linger_requests = RB_ROOT;
+	osd->o_backoff_mappings = RB_ROOT;
+	osd->o_backoffs_by_id = RB_ROOT;
 	INIT_LIST_HEAD(&osd->o_osd_lru);
 	INIT_LIST_HEAD(&osd->o_keepalive_item);
 	osd->o_incarnation = 1;
@@ -1021,6 +1033,8 @@ static void osd_cleanup(struct ceph_osd *osd)
 	WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_requests));
 	WARN_ON(!RB_EMPTY_ROOT(&osd->o_linger_requests));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoff_mappings));
+	WARN_ON(!RB_EMPTY_ROOT(&osd->o_backoffs_by_id));
 	WARN_ON(!list_empty(&osd->o_osd_lru));
 	WARN_ON(!list_empty(&osd->o_keepalive_item));
 
@@ -1141,6 +1155,7 @@ static void close_osd(struct ceph_osd *osd)
 		unlink_linger(osd, lreq);
 		link_linger(&osdc->homeless_osd, lreq);
 	}
+	clear_backoffs(osd);
 
 	__remove_osd_from_lru(osd);
 	erase_osd(&osdc->osds, osd);
@@ -1297,7 +1312,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
 		       ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) ||
 		       __pool_full(pi);
 
-	WARN_ON(pi->id != t->base_oloc.pool);
+	WARN_ON(pi->id != t->target_oloc.pool);
 	return ((t->flags & CEPH_OSD_FLAG_READ) && pauserd) ||
 	       ((t->flags & CEPH_OSD_FLAG_WRITE) && pausewr) ||
 	       (osdc->osdmap->epoch < osdc->epoch_barrier);
@@ -1311,19 +1326,21 @@ enum calc_target_result {
 
 static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 					   struct ceph_osd_request_target *t,
-					   u32 *last_force_resend,
+					   struct ceph_connection *con,
 					   bool any_change)
 {
 	struct ceph_pg_pool_info *pi;
 	struct ceph_pg pgid, last_pgid;
 	struct ceph_osds up, acting;
 	bool force_resend = false;
-	bool need_check_tiering = false;
-	bool need_resend = false;
+	bool unpaused = false;
+	bool legacy_change;
+	bool split = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
 	enum calc_target_result ct_res;
 	int ret;
 
+	t->epoch = osdc->osdmap->epoch;
 	pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
 	if (!pi) {
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1332,33 +1349,33 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	}
 
 	if (osdc->osdmap->epoch == pi->last_force_request_resend) {
-		if (last_force_resend &&
-		    *last_force_resend < pi->last_force_request_resend) {
-			*last_force_resend = pi->last_force_request_resend;
+		if (t->last_force_resend < pi->last_force_request_resend) {
+			t->last_force_resend = pi->last_force_request_resend;
 			force_resend = true;
-		} else if (!last_force_resend) {
+		} else if (t->last_force_resend == 0) {
 			force_resend = true;
 		}
 	}
-	if (ceph_oid_empty(&t->target_oid) || force_resend) {
-		ceph_oid_copy(&t->target_oid, &t->base_oid);
-		need_check_tiering = true;
-	}
-	if (ceph_oloc_empty(&t->target_oloc) || force_resend) {
-		ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
-		need_check_tiering = true;
-	}
 
-	if (need_check_tiering &&
-	    (t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
+	/* apply tiering */
+	ceph_oid_copy(&t->target_oid, &t->base_oid);
+	ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
+	if ((t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
 		if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0)
 			t->target_oloc.pool = pi->read_tier;
 		if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
 			t->target_oloc.pool = pi->write_tier;
+
+		pi = ceph_pg_pool_by_id(osdc->osdmap, t->target_oloc.pool);
+		if (!pi) {
+			t->osd = CEPH_HOMELESS_OSD;
+			ct_res = CALC_TARGET_POOL_DNE;
+			goto out;
+		}
 	}
 
-	ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
-					&t->target_oloc, &pgid);
+	ret = __ceph_object_locator_to_pg(pi, &t->target_oid, &t->target_oloc,
+					  &pgid);
 	if (ret) {
 		WARN_ON(ret != -ENOENT);
 		t->osd = CEPH_HOMELESS_OSD;
@@ -1368,7 +1385,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	last_pgid.pool = pgid.pool;
 	last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
 
-	ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
+	ceph_pg_to_up_acting_osds(osdc->osdmap, pi, &pgid, &up, &acting);
 	if (any_change &&
 	    ceph_is_new_interval(&t->acting,
 				 &acting,
@@ -1387,13 +1404,16 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 
 	if (t->paused && !target_should_be_paused(osdc, t, pi)) {
 		t->paused = false;
-		need_resend = true;
+		unpaused = true;
 	}
+	legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
+			ceph_osds_changed(&t->acting, &acting, any_change);
+	if (t->pg_num)
+		split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num);
 
-	if (ceph_pg_compare(&t->pgid, &pgid) ||
-	    ceph_osds_changed(&t->acting, &acting, any_change) ||
-	    force_resend) {
+	if (legacy_change || force_resend || split) {
 		t->pgid = pgid; /* struct */
+		ceph_pg_to_primary_shard(osdc->osdmap, pi, &pgid, &t->spgid);
 		ceph_osds_copy(&t->acting, &acting);
 		ceph_osds_copy(&t->up, &up);
 		t->size = pi->size;
@@ -1403,15 +1423,342 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->sort_bitwise = sort_bitwise;
 
 		t->osd = acting.primary;
-		need_resend = true;
 	}
 
-	ct_res = need_resend ? CALC_TARGET_NEED_RESEND : CALC_TARGET_NO_ACTION;
+	if (unpaused || legacy_change || force_resend ||
+	    (split && con && CEPH_HAVE_FEATURE(con->peer_features,
+					       RESEND_ON_SPLIT)))
+		ct_res = CALC_TARGET_NEED_RESEND;
+	else
+		ct_res = CALC_TARGET_NO_ACTION;
+
 out:
 	dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
 	return ct_res;
 }
 
+static struct ceph_spg_mapping *alloc_spg_mapping(void)
+{
+	struct ceph_spg_mapping *spg;
+
+	spg = kmalloc(sizeof(*spg), GFP_NOIO);
+	if (!spg)
+		return NULL;
+
+	RB_CLEAR_NODE(&spg->node);
+	spg->backoffs = RB_ROOT;
+	return spg;
+}
+
+static void free_spg_mapping(struct ceph_spg_mapping *spg)
+{
+	WARN_ON(!RB_EMPTY_NODE(&spg->node));
+	WARN_ON(!RB_EMPTY_ROOT(&spg->backoffs));
+
+	kfree(spg);
+}
+
+/*
+ * rbtree of ceph_spg_mapping for handling map<spg_t, ...>, similar to
+ * ceph_pg_mapping.  Used to track OSD backoffs -- a backoff [range] is
+ * defined only within a specific spgid; it does not pass anything to
+ * children on split, or to another primary.
+ */
+DEFINE_RB_FUNCS2(spg_mapping, struct ceph_spg_mapping, spgid, ceph_spg_compare,
+		 RB_BYPTR, const struct ceph_spg *, node)
+
+static u64 hoid_get_bitwise_key(const struct ceph_hobject_id *hoid)
+{
+	return hoid->is_max ? 0x100000000ull : hoid->hash_reverse_bits;
+}
+
+static void hoid_get_effective_key(const struct ceph_hobject_id *hoid,
+				   void **pkey, size_t *pkey_len)
+{
+	if (hoid->key_len) {
+		*pkey = hoid->key;
+		*pkey_len = hoid->key_len;
+	} else {
+		*pkey = hoid->oid;
+		*pkey_len = hoid->oid_len;
+	}
+}
+
+static int compare_names(const void *name1, size_t name1_len,
+			 const void *name2, size_t name2_len)
+{
+	int ret;
+
+	ret = memcmp(name1, name2, min(name1_len, name2_len));
+	if (!ret) {
+		if (name1_len < name2_len)
+			ret = -1;
+		else if (name1_len > name2_len)
+			ret = 1;
+	}
+	return ret;
+}
+
+static int hoid_compare(const struct ceph_hobject_id *lhs,
+			const struct ceph_hobject_id *rhs)
+{
+	void *effective_key1, *effective_key2;
+	size_t effective_key1_len, effective_key2_len;
+	int ret;
+
+	if (lhs->is_max < rhs->is_max)
+		return -1;
+	if (lhs->is_max > rhs->is_max)
+		return 1;
+
+	if (lhs->pool < rhs->pool)
+		return -1;
+	if (lhs->pool > rhs->pool)
+		return 1;
+
+	if (hoid_get_bitwise_key(lhs) < hoid_get_bitwise_key(rhs))
+		return -1;
+	if (hoid_get_bitwise_key(lhs) > hoid_get_bitwise_key(rhs))
+		return 1;
+
+	ret = compare_names(lhs->nspace, lhs->nspace_len,
+			    rhs->nspace, rhs->nspace_len);
+	if (ret)
+		return ret;
+
+	hoid_get_effective_key(lhs, &effective_key1, &effective_key1_len);
+	hoid_get_effective_key(rhs, &effective_key2, &effective_key2_len);
+	ret = compare_names(effective_key1, effective_key1_len,
+			    effective_key2, effective_key2_len);
+	if (ret)
+		return ret;
+
+	ret = compare_names(lhs->oid, lhs->oid_len, rhs->oid, rhs->oid_len);
+	if (ret)
+		return ret;
+
+	if (lhs->snapid < rhs->snapid)
+		return -1;
+	if (lhs->snapid > rhs->snapid)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * For decoding ->begin and ->end of MOSDBackoff only -- no MIN/MAX
+ * compat stuff here.
+ *
+ * Assumes @hoid is zero-initialized.
+ */
+static int decode_hoid(void **p, void *end, struct ceph_hobject_id *hoid)
+{
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(p, end, 4, "hobject_t", &struct_v,
+				  &struct_len);
+	if (ret)
+		return ret;
+
+	if (struct_v < 4) {
+		pr_err("got struct_v %d < 4 of hobject_t\n", struct_v);
+		goto e_inval;
+	}
+
+	hoid->key = ceph_extract_encoded_string(p, end, &hoid->key_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->key)) {
+		ret = PTR_ERR(hoid->key);
+		hoid->key = NULL;
+		return ret;
+	}
+
+	hoid->oid = ceph_extract_encoded_string(p, end, &hoid->oid_len,
+						GFP_NOIO);
+	if (IS_ERR(hoid->oid)) {
+		ret = PTR_ERR(hoid->oid);
+		hoid->oid = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->snapid, e_inval);
+	ceph_decode_32_safe(p, end, hoid->hash, e_inval);
+	ceph_decode_8_safe(p, end, hoid->is_max, e_inval);
+
+	hoid->nspace = ceph_extract_encoded_string(p, end, &hoid->nspace_len,
+						   GFP_NOIO);
+	if (IS_ERR(hoid->nspace)) {
+		ret = PTR_ERR(hoid->nspace);
+		hoid->nspace = NULL;
+		return ret;
+	}
+
+	ceph_decode_64_safe(p, end, hoid->pool, e_inval);
+
+	ceph_hoid_build_hash_cache(hoid);
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int hoid_encoding_size(const struct ceph_hobject_id *hoid)
+{
+	return 8 + 4 + 1 + 8 + /* snapid, hash, is_max, pool */
+	       4 + hoid->key_len + 4 + hoid->oid_len + 4 + hoid->nspace_len;
+}
+
+static void encode_hoid(void **p, void *end, const struct ceph_hobject_id *hoid)
+{
+	ceph_start_encoding(p, 4, 3, hoid_encoding_size(hoid));
+	ceph_encode_string(p, end, hoid->key, hoid->key_len);
+	ceph_encode_string(p, end, hoid->oid, hoid->oid_len);
+	ceph_encode_64(p, hoid->snapid);
+	ceph_encode_32(p, hoid->hash);
+	ceph_encode_8(p, hoid->is_max);
+	ceph_encode_string(p, end, hoid->nspace, hoid->nspace_len);
+	ceph_encode_64(p, hoid->pool);
+}
+
+static void free_hoid(struct ceph_hobject_id *hoid)
+{
+	if (hoid) {
+		kfree(hoid->key);
+		kfree(hoid->oid);
+		kfree(hoid->nspace);
+		kfree(hoid);
+	}
+}
+
+static struct ceph_osd_backoff *alloc_backoff(void)
+{
+	struct ceph_osd_backoff *backoff;
+
+	backoff = kzalloc(sizeof(*backoff), GFP_NOIO);
+	if (!backoff)
+		return NULL;
+
+	RB_CLEAR_NODE(&backoff->spg_node);
+	RB_CLEAR_NODE(&backoff->id_node);
+	return backoff;
+}
+
+static void free_backoff(struct ceph_osd_backoff *backoff)
+{
+	WARN_ON(!RB_EMPTY_NODE(&backoff->spg_node));
+	WARN_ON(!RB_EMPTY_NODE(&backoff->id_node));
+
+	free_hoid(backoff->begin);
+	free_hoid(backoff->end);
+	kfree(backoff);
+}
+
+/*
+ * Within a specific spgid, backoffs are managed by ->begin hoid.
+ */
+DEFINE_RB_INSDEL_FUNCS2(backoff, struct ceph_osd_backoff, begin, hoid_compare,
+			RB_BYVAL, spg_node);
+
+static struct ceph_osd_backoff *lookup_containing_backoff(struct rb_root *root,
+					    const struct ceph_hobject_id *hoid)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct ceph_osd_backoff *cur =
+		    rb_entry(n, struct ceph_osd_backoff, spg_node);
+		int cmp;
+
+		cmp = hoid_compare(hoid, cur->begin);
+		if (cmp < 0) {
+			n = n->rb_left;
+		} else if (cmp > 0) {
+			if (hoid_compare(hoid, cur->end) < 0)
+				return cur;
+
+			n = n->rb_right;
+		} else {
+			return cur;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Each backoff has a unique id within its OSD session.
+ */
+DEFINE_RB_FUNCS(backoff_by_id, struct ceph_osd_backoff, id, id_node)
+
+static void clear_backoffs(struct ceph_osd *osd)
+{
+	while (!RB_EMPTY_ROOT(&osd->o_backoff_mappings)) {
+		struct ceph_spg_mapping *spg =
+		    rb_entry(rb_first(&osd->o_backoff_mappings),
+			     struct ceph_spg_mapping, node);
+
+		while (!RB_EMPTY_ROOT(&spg->backoffs)) {
+			struct ceph_osd_backoff *backoff =
+			    rb_entry(rb_first(&spg->backoffs),
+				     struct ceph_osd_backoff, spg_node);
+
+			erase_backoff(&spg->backoffs, backoff);
+			erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+			free_backoff(backoff);
+		}
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+}
+
+/*
+ * Set up a temporary, non-owning view into @t.
+ */
+static void hoid_fill_from_target(struct ceph_hobject_id *hoid,
+				  const struct ceph_osd_request_target *t)
+{
+	hoid->key = NULL;
+	hoid->key_len = 0;
+	hoid->oid = t->target_oid.name;
+	hoid->oid_len = t->target_oid.name_len;
+	hoid->snapid = CEPH_NOSNAP;
+	hoid->hash = t->pgid.seed;
+	hoid->is_max = false;
+	if (t->target_oloc.pool_ns) {
+		hoid->nspace = t->target_oloc.pool_ns->str;
+		hoid->nspace_len = t->target_oloc.pool_ns->len;
+	} else {
+		hoid->nspace = NULL;
+		hoid->nspace_len = 0;
+	}
+	hoid->pool = t->target_oloc.pool;
+	ceph_hoid_build_hash_cache(hoid);
+}
+
+static bool should_plug_request(struct ceph_osd_request *req)
+{
+	struct ceph_osd *osd = req->r_osd;
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_hobject_id hoid;
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &req->r_t.spgid);
+	if (!spg)
+		return false;
+
+	hoid_fill_from_target(&hoid, &req->r_t);
+	backoff = lookup_containing_backoff(&spg->backoffs, &hoid);
+	if (!backoff)
+		return false;
+
+	dout("%s req %p tid %llu backoff osd%d spgid %llu.%xs%d id %llu\n",
+	     __func__, req, req->r_tid, osd->o_osd, backoff->spgid.pgid.pool,
+	     backoff->spgid.pgid.seed, backoff->spgid.shard, backoff->id);
+	return true;
+}
+
 static void setup_request_data(struct ceph_osd_request *req,
 			       struct ceph_msg *msg)
 {
@@ -1483,7 +1830,37 @@ static void setup_request_data(struct ceph_osd_request *req,
 	WARN_ON(data_len != msg->data_length);
 }
 
-static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
+static void encode_pgid(void **p, const struct ceph_pg *pgid)
+{
+	ceph_encode_8(p, 1);
+	ceph_encode_64(p, pgid->pool);
+	ceph_encode_32(p, pgid->seed);
+	ceph_encode_32(p, -1); /* preferred */
+}
+
+static void encode_spgid(void **p, const struct ceph_spg *spgid)
+{
+	ceph_start_encoding(p, 1, 1, CEPH_PGID_ENCODING_LEN + 1);
+	encode_pgid(p, &spgid->pgid);
+	ceph_encode_8(p, spgid->shard);
+}
+
+static void encode_oloc(void **p, void *end,
+			const struct ceph_object_locator *oloc)
+{
+	ceph_start_encoding(p, 5, 4, ceph_oloc_encoding_size(oloc));
+	ceph_encode_64(p, oloc->pool);
+	ceph_encode_32(p, -1); /* preferred */
+	ceph_encode_32(p, 0);  /* key len */
+	if (oloc->pool_ns)
+		ceph_encode_string(p, end, oloc->pool_ns->str,
+				   oloc->pool_ns->len);
+	else
+		ceph_encode_32(p, 0);
+}
+
+static void encode_request_partial(struct ceph_osd_request *req,
+				   struct ceph_msg *msg)
 {
 	void *p = msg->front.iov_base;
 	void *const end = p + msg->front_alloc_len;
@@ -1500,38 +1877,27 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 
 	setup_request_data(req, msg);
 
-	ceph_encode_32(&p, 1); /* client_inc, always 1 */
+	encode_spgid(&p, &req->r_t.spgid); /* actual spg */
+	ceph_encode_32(&p, req->r_t.pgid.seed); /* raw hash */
 	ceph_encode_32(&p, req->r_osdc->osdmap->epoch);
 	ceph_encode_32(&p, req->r_flags);
+
+	/* reqid */
+	ceph_start_encoding(&p, 2, 2, sizeof(struct ceph_osd_reqid));
+	memset(p, 0, sizeof(struct ceph_osd_reqid));
+	p += sizeof(struct ceph_osd_reqid);
+
+	/* trace */
+	memset(p, 0, sizeof(struct ceph_blkin_trace_info));
+	p += sizeof(struct ceph_blkin_trace_info);
+
+	ceph_encode_32(&p, 0); /* client_inc, always 0 */
 	ceph_encode_timespec(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
 
-	/* reassert_version */
-	memset(p, 0, sizeof(struct ceph_eversion));
-	p += sizeof(struct ceph_eversion);
-
-	/* oloc */
-	ceph_start_encoding(&p, 5, 4,
-			    ceph_oloc_encoding_size(&req->r_t.target_oloc));
-	ceph_encode_64(&p, req->r_t.target_oloc.pool);
-	ceph_encode_32(&p, -1); /* preferred */
-	ceph_encode_32(&p, 0); /* key len */
-	if (req->r_t.target_oloc.pool_ns)
-		ceph_encode_string(&p, end, req->r_t.target_oloc.pool_ns->str,
-				   req->r_t.target_oloc.pool_ns->len);
-	else
-		ceph_encode_32(&p, 0);
-
-	/* pgid */
-	ceph_encode_8(&p, 1);
-	ceph_encode_64(&p, req->r_t.pgid.pool);
-	ceph_encode_32(&p, req->r_t.pgid.seed);
-	ceph_encode_32(&p, -1); /* preferred */
-
-	/* oid */
-	ceph_encode_32(&p, req->r_t.target_oid.name_len);
-	memcpy(p, req->r_t.target_oid.name, req->r_t.target_oid.name_len);
-	p += req->r_t.target_oid.name_len;
+	encode_oloc(&p, end, &req->r_t.target_oloc);
+	ceph_encode_string(&p, end, req->r_t.target_oid.name,
+			   req->r_t.target_oid.name_len);
 
 	/* ops, can imply data */
 	ceph_encode_16(&p, req->r_num_ops);
@@ -1552,11 +1918,10 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	}
 
 	ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
+	BUG_ON(p != end - 8); /* space for features */
 
-	BUG_ON(p > end);
-	msg->front.iov_len = p - msg->front.iov_base;
-	msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
-	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+	msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
+	/* front_len is finalized in encode_request_finish() */
 	msg->hdr.data_len = cpu_to_le32(data_len);
 	/*
 	 * The header "data_off" is a hint to the receiver allowing it
@@ -1565,9 +1930,99 @@ static void encode_request(struct ceph_osd_request *req, struct ceph_msg *msg)
 	 */
 	msg->hdr.data_off = cpu_to_le16(req->r_data_offset);
 
-	dout("%s req %p oid %s oid_len %d front %zu data %u\n", __func__,
-	     req, req->r_t.target_oid.name, req->r_t.target_oid.name_len,
-	     msg->front.iov_len, data_len);
+	dout("%s req %p msg %p oid %s oid_len %d\n", __func__, req, msg,
+	     req->r_t.target_oid.name, req->r_t.target_oid.name_len);
+}
+
+static void encode_request_finish(struct ceph_msg *msg)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front_alloc_len;
+
+	if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
+		/* luminous OSD -- encode features and be done */
+		p = end - 8;
+		ceph_encode_64(&p, msg->con->peer_features);
+	} else {
+		struct {
+			char spgid[CEPH_ENCODING_START_BLK_LEN +
+				   CEPH_PGID_ENCODING_LEN + 1];
+			__le32 hash;
+			__le32 epoch;
+			__le32 flags;
+			char reqid[CEPH_ENCODING_START_BLK_LEN +
+				   sizeof(struct ceph_osd_reqid)];
+			char trace[sizeof(struct ceph_blkin_trace_info)];
+			__le32 client_inc;
+			struct ceph_timespec mtime;
+		} __packed head;
+		struct ceph_pg pgid;
+		void *oloc, *oid, *tail;
+		int oloc_len, oid_len, tail_len;
+		int len;
+
+		/*
+		 * Pre-luminous OSD -- reencode v8 into v4 using @head
+		 * as a temporary buffer.  Encode the raw PG; the rest
+		 * is just a matter of moving oloc, oid and tail blobs
+		 * around.
+		 */
+		memcpy(&head, p, sizeof(head));
+		p += sizeof(head);
+
+		oloc = p;
+		p += CEPH_ENCODING_START_BLK_LEN;
+		pgid.pool = ceph_decode_64(&p);
+		p += 4 + 4; /* preferred, key len */
+		len = ceph_decode_32(&p);
+		p += len;   /* nspace */
+		oloc_len = p - oloc;
+
+		oid = p;
+		len = ceph_decode_32(&p);
+		p += len;
+		oid_len = p - oid;
+
+		tail = p;
+		tail_len = (end - p) - 8;
+
+		p = msg->front.iov_base;
+		ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
+		ceph_encode_copy(&p, &head.epoch, sizeof(head.epoch));
+		ceph_encode_copy(&p, &head.flags, sizeof(head.flags));
+		ceph_encode_copy(&p, &head.mtime, sizeof(head.mtime));
+
+		/* reassert_version */
+		memset(p, 0, sizeof(struct ceph_eversion));
+		p += sizeof(struct ceph_eversion);
+
+		BUG_ON(p >= oloc);
+		memmove(p, oloc, oloc_len);
+		p += oloc_len;
+
+		pgid.seed = le32_to_cpu(head.hash);
+		encode_pgid(&p, &pgid); /* raw pg */
+
+		BUG_ON(p >= oid);
+		memmove(p, oid, oid_len);
+		p += oid_len;
+
+		/* tail -- ops, snapid, snapc, retry_attempt */
+		BUG_ON(p >= tail);
+		memmove(p, tail, tail_len);
+		p += tail_len;
+
+		msg->hdr.version = cpu_to_le16(4); /* MOSDOp v4 */
+	}
+
+	BUG_ON(p > end);
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	dout("%s msg %p tid %llu %u+%u+%u v%d\n", __func__, msg,
+	     le64_to_cpu(msg->hdr.tid), le32_to_cpu(msg->hdr.front_len),
+	     le32_to_cpu(msg->hdr.middle_len), le32_to_cpu(msg->hdr.data_len),
+	     le16_to_cpu(msg->hdr.version));
 }
 
 /*
@@ -1580,6 +2035,10 @@ static void send_request(struct ceph_osd_request *req)
 	verify_osd_locked(osd);
 	WARN_ON(osd->o_osd != req->r_t.osd);
 
+	/* backoff? */
+	if (should_plug_request(req))
+		return;
+
 	/*
 	 * We may have a previously queued request message hanging
 	 * around.  Cancel it to avoid corrupting the msgr.
@@ -1593,11 +2052,13 @@ static void send_request(struct ceph_osd_request *req)
 	else
 		WARN_ON(req->r_flags & CEPH_OSD_FLAG_RETRY);
 
-	encode_request(req, req->r_request);
+	encode_request_partial(req, req->r_request);
 
-	dout("%s req %p tid %llu to pg %llu.%x osd%d flags 0x%x attempt %d\n",
+	dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n",
 	     __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
-	     req->r_t.osd, req->r_flags, req->r_attempts);
+	     req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
+	     req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags,
+	     req->r_attempts);
 
 	req->r_t.paused = false;
 	req->r_stamp = jiffies;
@@ -1645,7 +2106,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
-	ct_res = calc_target(osdc, &req->r_t, &req->r_last_force_resend, false);
+	ct_res = calc_target(osdc, &req->r_t, NULL, false);
 	if (ct_res == CALC_TARGET_POOL_DNE && !wrlocked)
 		goto promote;
 
@@ -1737,13 +2198,12 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
 static void finish_request(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
-	struct ceph_osd *osd = req->r_osd;
-
-	verify_osd_locked(osd);
-	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
 	WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
-	unlink_request(osd, req);
+	dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
+
+	if (req->r_osd)
+		unlink_request(req->r_osd, req);
 	atomic_dec(&osdc->num_requests);
 
 	/*
@@ -2441,7 +2901,7 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	struct ceph_osd *osd;
 
-	calc_target(osdc, &lreq->t, &lreq->last_force_resend, false);
+	calc_target(osdc, &lreq->t, NULL, false);
 	osd = lookup_create_osd(osdc, lreq->t.osd, true);
 	link_linger(osd, lreq);
 
@@ -3059,7 +3519,7 @@ recalc_linger_target(struct ceph_osd_linger_request *lreq)
 	struct ceph_osd_client *osdc = lreq->osdc;
 	enum calc_target_result ct_res;
 
-	ct_res = calc_target(osdc, &lreq->t, &lreq->last_force_resend, true);
+	ct_res = calc_target(osdc, &lreq->t, NULL, true);
 	if (ct_res == CALC_TARGET_NEED_RESEND) {
 		struct ceph_osd *osd;
 
@@ -3117,6 +3577,7 @@ static void scan_requests(struct ceph_osd *osd,
 				list_add_tail(&lreq->scan_item, need_resend_linger);
 			break;
 		case CALC_TARGET_POOL_DNE:
+			list_del_init(&lreq->scan_item);
 			check_linger_pool_dne(lreq);
 			break;
 		}
@@ -3130,8 +3591,8 @@ static void scan_requests(struct ceph_osd *osd,
 		n = rb_next(n); /* unlink_request(), check_pool_dne() */
 
 		dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
-		ct_res = calc_target(osdc, &req->r_t,
-				     &req->r_last_force_resend, false);
+		ct_res = calc_target(osdc, &req->r_t, &req->r_osd->o_con,
+				     false);
 		switch (ct_res) {
 		case CALC_TARGET_NO_ACTION:
 			force_resend_writes = cleared_full ||
@@ -3229,8 +3690,25 @@ static void kick_requests(struct ceph_osd_client *osdc,
 			  struct list_head *need_resend_linger)
 {
 	struct ceph_osd_linger_request *lreq, *nlreq;
+	enum calc_target_result ct_res;
 	struct rb_node *n;
 
+	/* make sure need_resend targets reflect latest map */
+	for (n = rb_first(need_resend); n; ) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		n = rb_next(n);
+
+		if (req->r_t.epoch < osdc->osdmap->epoch) {
+			ct_res = calc_target(osdc, &req->r_t, NULL, false);
+			if (ct_res == CALC_TARGET_POOL_DNE) {
+				erase_request(need_resend, req);
+				check_pool_dne(req);
+			}
+		}
+	}
+
 	for (n = rb_first(need_resend); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3239,8 +3717,6 @@ static void kick_requests(struct ceph_osd_client *osdc,
 		n = rb_next(n);
 		erase_request(need_resend, req); /* before link_request() */
 
-		WARN_ON(req->r_osd);
-		calc_target(osdc, &req->r_t, NULL, false);
 		osd = lookup_create_osd(osdc, req->r_t.osd, true);
 		link_request(osd, req);
 		if (!req->r_linger) {
@@ -3383,6 +3859,8 @@ static void kick_osd_requests(struct ceph_osd *osd)
 {
 	struct rb_node *n;
 
+	clear_backoffs(osd);
+
 	for (n = rb_first(&osd->o_requests); n; ) {
 		struct ceph_osd_request *req =
 		    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3428,6 +3906,261 @@ static void osd_fault(struct ceph_connection *con)
 	up_write(&osdc->lock);
 }
 
+struct MOSDBackoff {
+	struct ceph_spg spgid;
+	u32 map_epoch;
+	u8 op;
+	u64 id;
+	struct ceph_hobject_id *begin;
+	struct ceph_hobject_id *end;
+};
+
+static int decode_MOSDBackoff(const struct ceph_msg *msg, struct MOSDBackoff *m)
+{
+	void *p = msg->front.iov_base;
+	void *const end = p + msg->front.iov_len;
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(&p, end, 1, "spg_t", &struct_v, &struct_len);
+	if (ret)
+		return ret;
+
+	ret = ceph_decode_pgid(&p, end, &m->spgid.pgid);
+	if (ret)
+		return ret;
+
+	ceph_decode_8_safe(&p, end, m->spgid.shard, e_inval);
+	ceph_decode_32_safe(&p, end, m->map_epoch, e_inval);
+	ceph_decode_8_safe(&p, end, m->op, e_inval);
+	ceph_decode_64_safe(&p, end, m->id, e_inval);
+
+	m->begin = kzalloc(sizeof(*m->begin), GFP_NOIO);
+	if (!m->begin)
+		return -ENOMEM;
+
+	ret = decode_hoid(&p, end, m->begin);
+	if (ret) {
+		free_hoid(m->begin);
+		return ret;
+	}
+
+	m->end = kzalloc(sizeof(*m->end), GFP_NOIO);
+	if (!m->end) {
+		free_hoid(m->begin);
+		return -ENOMEM;
+	}
+
+	ret = decode_hoid(&p, end, m->end);
+	if (ret) {
+		free_hoid(m->begin);
+		free_hoid(m->end);
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static struct ceph_msg *create_backoff_message(
+				const struct ceph_osd_backoff *backoff,
+				u32 map_epoch)
+{
+	struct ceph_msg *msg;
+	void *p, *end;
+	int msg_size;
+
+	msg_size = CEPH_ENCODING_START_BLK_LEN +
+			CEPH_PGID_ENCODING_LEN + 1; /* spgid */
+	msg_size += 4 + 1 + 8; /* map_epoch, op, id */
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->begin);
+	msg_size += CEPH_ENCODING_START_BLK_LEN +
+			hoid_encoding_size(backoff->end);
+
+	msg = ceph_msg_new(CEPH_MSG_OSD_BACKOFF, msg_size, GFP_NOIO, true);
+	if (!msg)
+		return NULL;
+
+	p = msg->front.iov_base;
+	end = p + msg->front_alloc_len;
+
+	encode_spgid(&p, &backoff->spgid);
+	ceph_encode_32(&p, map_epoch);
+	ceph_encode_8(&p, CEPH_OSD_BACKOFF_OP_ACK_BLOCK);
+	ceph_encode_64(&p, backoff->id);
+	encode_hoid(&p, end, backoff->begin);
+	encode_hoid(&p, end, backoff->end);
+	BUG_ON(p != end);
+
+	msg->front.iov_len = p - msg->front.iov_base;
+	msg->hdr.version = cpu_to_le16(1); /* MOSDBackoff v1 */
+	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
+
+	return msg;
+}
+
+static void handle_backoff_block(struct ceph_osd *osd, struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct ceph_msg *msg;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &m->spgid);
+	if (!spg) {
+		spg = alloc_spg_mapping();
+		if (!spg) {
+			pr_err("%s failed to allocate spg\n", __func__);
+			return;
+		}
+		spg->spgid = m->spgid; /* struct */
+		insert_spg_mapping(&osd->o_backoff_mappings, spg);
+	}
+
+	backoff = alloc_backoff();
+	if (!backoff) {
+		pr_err("%s failed to allocate backoff\n", __func__);
+		return;
+	}
+	backoff->spgid = m->spgid; /* struct */
+	backoff->id = m->id;
+	backoff->begin = m->begin;
+	m->begin = NULL; /* backoff now owns this */
+	backoff->end = m->end;
+	m->end = NULL;   /* ditto */
+
+	insert_backoff(&spg->backoffs, backoff);
+	insert_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+
+	/*
+	 * Ack with original backoff's epoch so that the OSD can
+	 * discard this if there was a PG split.
+	 */
+	msg = create_backoff_message(backoff, m->map_epoch);
+	if (!msg) {
+		pr_err("%s failed to allocate msg\n", __func__);
+		return;
+	}
+	ceph_con_send(&osd->o_con, msg);
+}
+
+static bool target_contained_by(const struct ceph_osd_request_target *t,
+				const struct ceph_hobject_id *begin,
+				const struct ceph_hobject_id *end)
+{
+	struct ceph_hobject_id hoid;
+	int cmp;
+
+	hoid_fill_from_target(&hoid, t);
+	cmp = hoid_compare(&hoid, begin);
+	return !cmp || (cmp > 0 && hoid_compare(&hoid, end) < 0);
+}
+
+static void handle_backoff_unblock(struct ceph_osd *osd,
+				   const struct MOSDBackoff *m)
+{
+	struct ceph_spg_mapping *spg;
+	struct ceph_osd_backoff *backoff;
+	struct rb_node *n;
+
+	dout("%s osd%d spgid %llu.%xs%d id %llu\n", __func__, osd->o_osd,
+	     m->spgid.pgid.pool, m->spgid.pgid.seed, m->spgid.shard, m->id);
+
+	backoff = lookup_backoff_by_id(&osd->o_backoffs_by_id, m->id);
+	if (!backoff) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu backoff dne\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		return;
+	}
+
+	if (hoid_compare(backoff->begin, m->begin) &&
+	    hoid_compare(backoff->end, m->end)) {
+		pr_err("%s osd%d spgid %llu.%xs%d id %llu bad range?\n",
+		       __func__, osd->o_osd, m->spgid.pgid.pool,
+		       m->spgid.pgid.seed, m->spgid.shard, m->id);
+		/* unblock it anyway... */
+	}
+
+	spg = lookup_spg_mapping(&osd->o_backoff_mappings, &backoff->spgid);
+	BUG_ON(!spg);
+
+	erase_backoff(&spg->backoffs, backoff);
+	erase_backoff_by_id(&osd->o_backoffs_by_id, backoff);
+	free_backoff(backoff);
+
+	if (RB_EMPTY_ROOT(&spg->backoffs)) {
+		erase_spg_mapping(&osd->o_backoff_mappings, spg);
+		free_spg_mapping(spg);
+	}
+
+	for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
+		struct ceph_osd_request *req =
+		    rb_entry(n, struct ceph_osd_request, r_node);
+
+		if (!ceph_spg_compare(&req->r_t.spgid, &m->spgid)) {
+			/*
+			 * Match against @m, not @backoff -- the PG may
+			 * have split on the OSD.
+			 */
+			if (target_contained_by(&req->r_t, m->begin, m->end)) {
+				/*
+				 * If no other installed backoff applies,
+				 * resend.
+				 */
+				send_request(req);
+			}
+		}
+	}
+}
+
+static void handle_backoff(struct ceph_osd *osd, struct ceph_msg *msg)
+{
+	struct ceph_osd_client *osdc = osd->o_osdc;
+	struct MOSDBackoff m;
+	int ret;
+
+	down_read(&osdc->lock);
+	if (!osd_registered(osd)) {
+		dout("%s osd%d unknown\n", __func__, osd->o_osd);
+		up_read(&osdc->lock);
+		return;
+	}
+	WARN_ON(osd->o_osd != le64_to_cpu(msg->hdr.src.num));
+
+	mutex_lock(&osd->lock);
+	ret = decode_MOSDBackoff(msg, &m);
+	if (ret) {
+		pr_err("failed to decode MOSDBackoff: %d\n", ret);
+		ceph_msg_dump(msg);
+		goto out_unlock;
+	}
+
+	switch (m.op) {
+	case CEPH_OSD_BACKOFF_OP_BLOCK:
+		handle_backoff_block(osd, &m);
+		break;
+	case CEPH_OSD_BACKOFF_OP_UNBLOCK:
+		handle_backoff_unblock(osd, &m);
+		break;
+	default:
+		pr_err("%s osd%d unknown op %d\n", __func__, osd->o_osd, m.op);
+	}
+
+	free_hoid(m.begin);
+	free_hoid(m.end);
+
+out_unlock:
+	mutex_unlock(&osd->lock);
+	up_read(&osdc->lock);
+}
+
 /*
  * Process osd watch notifications
  */
@@ -4365,6 +5098,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_OSD_OPREPLY:
 		handle_reply(osd, msg);
 		break;
+	case CEPH_MSG_OSD_BACKOFF:
+		handle_backoff(osd, msg);
+		break;
 	case CEPH_MSG_WATCH_NOTIFY:
 		handle_watch_notify(osdc, msg);
 		break;
@@ -4487,6 +5223,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
 	*skip = 0;
 	switch (type) {
 	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_OSD_BACKOFF:
 	case CEPH_MSG_WATCH_NOTIFY:
 		return alloc_msg_with_page_vector(hdr);
 	case CEPH_MSG_OSD_OPREPLY:
@@ -4571,6 +5308,11 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static void osd_reencode_message(struct ceph_msg *msg)
+{
+	encode_request_finish(msg);
+}
+
 static int osd_sign_message(struct ceph_msg *msg)
 {
 	struct ceph_osd *o = msg->con->private;
@@ -4595,6 +5337,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.reencode_message = osd_reencode_message,
 	.sign_message = osd_sign_message,
 	.check_message_signature = osd_check_message_signature,
 	.fault = osd_fault,

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 55e3a47..864789c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c

@@ -11,7 +11,7 @@
 #include <linux/crush/hash.h>
 #include <linux/crush/mapper.h>
 
-char *ceph_osdmap_state_str(char *str, int len, int state)
+char *ceph_osdmap_state_str(char *str, int len, u32 state)
 {
 	if (!len)
 		return str;
@@ -138,19 +138,175 @@ static int crush_decode_straw2_bucket(void **p, void *end,
 	return -EINVAL;
 }
 
-static int skip_name_map(void **p, void *end)
+static struct crush_choose_arg_map *alloc_choose_arg_map(void)
 {
-        int len;
-        ceph_decode_32_safe(p, end, len ,bad);
-        while (len--) {
-                int strlen;
-                *p += sizeof(u32);
-                ceph_decode_32_safe(p, end, strlen, bad);
-                *p += strlen;
+	struct crush_choose_arg_map *arg_map;
+
+	arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
+	if (!arg_map)
+		return NULL;
+
+	RB_CLEAR_NODE(&arg_map->node);
+	return arg_map;
 }
-        return 0;
-bad:
-        return -EINVAL;
+
+static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
+{
+	if (arg_map) {
+		int i, j;
+
+		WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
+
+		for (i = 0; i < arg_map->size; i++) {
+			struct crush_choose_arg *arg = &arg_map->args[i];
+
+			for (j = 0; j < arg->weight_set_size; j++)
+				kfree(arg->weight_set[j].weights);
+			kfree(arg->weight_set);
+			kfree(arg->ids);
+		}
+		kfree(arg_map->args);
+		kfree(arg_map);
+	}
+}
+
+DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
+		node);
+
+void clear_choose_args(struct crush_map *c)
+{
+	while (!RB_EMPTY_ROOT(&c->choose_args)) {
+		struct crush_choose_arg_map *arg_map =
+		    rb_entry(rb_first(&c->choose_args),
+			     struct crush_choose_arg_map, node);
+
+		erase_choose_arg_map(&c->choose_args, arg_map);
+		free_choose_arg_map(arg_map);
+	}
+}
+
+static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
+{
+	u32 *a = NULL;
+	u32 len;
+	int ret;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len) {
+		u32 i;
+
+		a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
+		if (!a) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_need(p, end, len * sizeof(u32), e_inval);
+		for (i = 0; i < len; i++)
+			a[i] = ceph_decode_32(p);
+	}
+
+	*plen = len;
+	return a;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	kfree(a);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Assumes @arg is zero-initialized.
+ */
+static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
+{
+	int ret;
+
+	ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
+	if (arg->weight_set_size) {
+		u32 i;
+
+		arg->weight_set = kmalloc_array(arg->weight_set_size,
+						sizeof(*arg->weight_set),
+						GFP_NOIO);
+		if (!arg->weight_set)
+			return -ENOMEM;
+
+		for (i = 0; i < arg->weight_set_size; i++) {
+			struct crush_weight_set *w = &arg->weight_set[i];
+
+			w->weights = decode_array_32_alloc(p, end, &w->size);
+			if (IS_ERR(w->weights)) {
+				ret = PTR_ERR(w->weights);
+				w->weights = NULL;
+				return ret;
+			}
+		}
+	}
+
+	arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
+	if (IS_ERR(arg->ids)) {
+		ret = PTR_ERR(arg->ids);
+		arg->ids = NULL;
+		return ret;
+	}
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int decode_choose_args(void **p, void *end, struct crush_map *c)
+{
+	struct crush_choose_arg_map *arg_map = NULL;
+	u32 num_choose_arg_maps, num_buckets;
+	int ret;
+
+	ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
+	while (num_choose_arg_maps--) {
+		arg_map = alloc_choose_arg_map();
+		if (!arg_map) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_64_safe(p, end, arg_map->choose_args_index,
+				    e_inval);
+		arg_map->size = c->max_buckets;
+		arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
+					GFP_NOIO);
+		if (!arg_map->args) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ceph_decode_32_safe(p, end, num_buckets, e_inval);
+		while (num_buckets--) {
+			struct crush_choose_arg *arg;
+			u32 bucket_index;
+
+			ceph_decode_32_safe(p, end, bucket_index, e_inval);
+			if (bucket_index >= arg_map->size)
+				goto e_inval;
+
+			arg = &arg_map->args[bucket_index];
+			ret = decode_choose_arg(p, end, arg);
+			if (ret)
+				goto fail;
+		}
+
+		insert_choose_arg_map(&c->choose_args, arg_map);
+	}
+
+	return 0;
+
+e_inval:
+	ret = -EINVAL;
+fail:
+	free_choose_arg_map(arg_map);
+	return ret;
 }
 
 static void crush_finalize(struct crush_map *c)
@@ -187,7 +343,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	void **p = &pbyval;
 	void *start = pbyval;
 	u32 magic;
-	u32 num_name_maps;
 
 	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
 
@@ -195,6 +350,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	if (c == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	c->choose_args = RB_ROOT;
+
         /* set tunables to default values */
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
@@ -353,12 +510,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		}
 	}
 
-	/* ignore trailing name maps. */
-        for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
-                err = skip_name_map(p, end);
-                if (err < 0)
-                        goto done;
-        }
+	ceph_decode_skip_map(p, end, 32, string, bad); /* type_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* name_map */
+	ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
 
         /* tunables */
         ceph_decode_need(p, end, 3*sizeof(u32), done);
@@ -391,6 +545,21 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
+	if (*p != end) {
+		/* class_map */
+		ceph_decode_skip_map(p, end, 32, 32, bad);
+		/* class_name */
+		ceph_decode_skip_map(p, end, 32, string, bad);
+		/* class_bucket */
+		ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
+	}
+
+	if (*p != end) {
+		err = decode_choose_args(p, end, c);
+		if (err)
+			goto bad;
+	}
+
 done:
 	crush_finalize(c);
 	dout("crush_decode success\n");
@@ -418,73 +587,47 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
 	return 0;
 }
 
+int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
+{
+	int ret;
+
+	ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
+	if (ret)
+		return ret;
+
+	if (lhs->shard < rhs->shard)
+		return -1;
+	if (lhs->shard > rhs->shard)
+		return 1;
+
+	return 0;
+}
+
+static struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
+{
+	struct ceph_pg_mapping *pg;
+
+	pg = kmalloc(sizeof(*pg) + payload_len, GFP_NOIO);
+	if (!pg)
+		return NULL;
+
+	RB_CLEAR_NODE(&pg->node);
+	return pg;
+}
+
+static void free_pg_mapping(struct ceph_pg_mapping *pg)
+{
+	WARN_ON(!RB_EMPTY_NODE(&pg->node));
+
+	kfree(pg);
+}
+
 /*
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds) and primary_temp (explicit primary setting)
  */
-static int __insert_pg_mapping(struct ceph_pg_mapping *new,
-			       struct rb_root *root)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_pg_mapping *pg = NULL;
-	int c;
-
-	dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
-	while (*p) {
-		parent = *p;
-		pg = rb_entry(parent, struct ceph_pg_mapping, node);
-		c = ceph_pg_compare(&new->pgid, &pg->pgid);
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-	return 0;
-}
-
-static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   struct ceph_pg pgid)
-{
-	struct rb_node *n = root->rb_node;
-	struct ceph_pg_mapping *pg;
-	int c;
-
-	while (n) {
-		pg = rb_entry(n, struct ceph_pg_mapping, node);
-		c = ceph_pg_compare(&pgid, &pg->pgid);
-		if (c < 0) {
-			n = n->rb_left;
-		} else if (c > 0) {
-			n = n->rb_right;
-		} else {
-			dout("__lookup_pg_mapping %lld.%x got %p\n",
-			     pgid.pool, pgid.seed, pg);
-			return pg;
-		}
-	}
-	return NULL;
-}
-
-static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
-{
-	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
-
-	if (pg) {
-		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
-		     pg);
-		rb_erase(&pg->node, root);
-		kfree(pg);
-		return 0;
-	}
-	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
-	return -ENOENT;
-}
+DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
+		 RB_BYPTR, const struct ceph_pg *, node)
 
 /*
  * rbtree of pg pool info
@@ -682,11 +825,48 @@ static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
 		*p += len;
 	}
 
+	/*
+	 * last_force_op_resend_preluminous, will be overridden if the
+	 * map was encoded with RESEND_ON_SPLIT
+	 */
 	if (ev >= 15)
 		pi->last_force_request_resend = ceph_decode_32(p);
 	else
 		pi->last_force_request_resend = 0;
 
+	if (ev >= 16)
+		*p += 4; /* skip min_read_recency_for_promote */
+
+	if (ev >= 17)
+		*p += 8; /* skip expected_num_objects */
+
+	if (ev >= 19)
+		*p += 4; /* skip cache_target_dirty_high_ratio_micro */
+
+	if (ev >= 20)
+		*p += 4; /* skip min_write_recency_for_promote */
+
+	if (ev >= 21)
+		*p += 1; /* skip use_gmt_hitset */
+
+	if (ev >= 22)
+		*p += 1; /* skip fast_read */
+
+	if (ev >= 23) {
+		*p += 4; /* skip hit_set_grade_decay_rate */
+		*p += 4; /* skip hit_set_search_last_n */
+	}
+
+	if (ev >= 24) {
+		/* skip opts */
+		*p += 1 + 1; /* versions */
+		len = ceph_decode_32(p);
+		*p += len;
+	}
+
+	if (ev >= 25)
+		pi->last_force_request_resend = ceph_decode_32(p);
+
 	/* ignore the rest */
 
 	*p = pool_end;
@@ -743,6 +923,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
 	map->pool_max = -1;
 	map->pg_temp = RB_ROOT;
 	map->primary_temp = RB_ROOT;
+	map->pg_upmap = RB_ROOT;
+	map->pg_upmap_items = RB_ROOT;
 	mutex_init(&map->crush_workspace_mutex);
 
 	return map;
@@ -757,14 +939,28 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
 		struct ceph_pg_mapping *pg =
 			rb_entry(rb_first(&map->pg_temp),
 				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->pg_temp);
-		kfree(pg);
+		erase_pg_mapping(&map->pg_temp, pg);
+		free_pg_mapping(pg);
 	}
 	while (!RB_EMPTY_ROOT(&map->primary_temp)) {
 		struct ceph_pg_mapping *pg =
 			rb_entry(rb_first(&map->primary_temp),
 				 struct ceph_pg_mapping, node);
-		rb_erase(&pg->node, &map->primary_temp);
+		erase_pg_mapping(&map->primary_temp, pg);
+		free_pg_mapping(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap);
+		kfree(pg);
+	}
+	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
+		struct ceph_pg_mapping *pg =
+			rb_entry(rb_first(&map->pg_upmap_items),
+				 struct ceph_pg_mapping, node);
+		rb_erase(&pg->node, &map->pg_upmap_items);
 		kfree(pg);
 	}
 	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
@@ -788,7 +984,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
  */
 static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 {
-	u8 *state;
+	u32 *state;
 	u32 *weight;
 	struct ceph_entity_addr *addr;
 	int i;
@@ -964,47 +1160,40 @@ static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
 	return __decode_pools(p, end, map, true);
 }
 
-static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
-			    bool incremental)
+typedef struct ceph_pg_mapping *(*decode_mapping_fn_t)(void **, void *, bool);
+
+static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
+			     decode_mapping_fn_t fn, bool incremental)
 {
 	u32 n;
 
+	WARN_ON(!incremental && !fn);
+
 	ceph_decode_32_safe(p, end, n, e_inval);
 	while (n--) {
+		struct ceph_pg_mapping *pg;
 		struct ceph_pg pgid;
-		u32 len, i;
 		int ret;
 
 		ret = ceph_decode_pgid(p, end, &pgid);
 		if (ret)
 			return ret;
 
-		ceph_decode_32_safe(p, end, len, e_inval);
+		pg = lookup_pg_mapping(mapping_root, &pgid);
+		if (pg) {
+			WARN_ON(!incremental);
+			erase_pg_mapping(mapping_root, pg);
+			free_pg_mapping(pg);
+		}
 
-		ret = __remove_pg_mapping(&map->pg_temp, pgid);
-		BUG_ON(!incremental && ret != -ENOENT);
+		if (fn) {
+			pg = fn(p, end, incremental);
+			if (IS_ERR(pg))
+				return PTR_ERR(pg);
 
-		if (!incremental || len > 0) {
-			struct ceph_pg_mapping *pg;
-
-			ceph_decode_need(p, end, len*sizeof(u32), e_inval);
-
-			if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
-				return -EINVAL;
-
-			pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS);
-			if (!pg)
-				return -ENOMEM;
-
-			pg->pgid = pgid;
-			pg->pg_temp.len = len;
-			for (i = 0; i < len; i++)
-				pg->pg_temp.osds[i] = ceph_decode_32(p);
-
-			ret = __insert_pg_mapping(pg, &map->pg_temp);
-			if (ret) {
-				kfree(pg);
-				return ret;
+			if (pg) {
+				pg->pgid = pgid; /* struct */
+				insert_pg_mapping(mapping_root, pg);
 			}
 		}
 	}
@@ -1015,69 +1204,77 @@ static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map,
 	return -EINVAL;
 }
 
+static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
+						bool incremental)
+{
+	struct ceph_pg_mapping *pg;
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len == 0 && incremental)
+		return NULL;	/* new_pg_temp: [] to remove */
+	if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32))
+		return ERR_PTR(-EINVAL);
+
+	ceph_decode_need(p, end, len * sizeof(u32), e_inval);
+	pg = alloc_pg_mapping(len * sizeof(u32));
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
+
+	pg->pg_temp.len = len;
+	for (i = 0; i < len; i++)
+		pg->pg_temp.osds[i] = ceph_decode_32(p);
+
+	return pg;
+
+e_inval:
+	return ERR_PTR(-EINVAL);
+}
+
 static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_pg_temp(p, end, map, false);
+	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
+				 false);
 }
 
 static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_pg_temp(p, end, map, true);
+	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
+				 true);
 }
 
-static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map,
-				 bool incremental)
+static struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
+						     bool incremental)
 {
-	u32 n;
+	struct ceph_pg_mapping *pg;
+	u32 osd;
 
-	ceph_decode_32_safe(p, end, n, e_inval);
-	while (n--) {
-		struct ceph_pg pgid;
-		u32 osd;
-		int ret;
+	ceph_decode_32_safe(p, end, osd, e_inval);
+	if (osd == (u32)-1 && incremental)
+		return NULL;	/* new_primary_temp: -1 to remove */
 
-		ret = ceph_decode_pgid(p, end, &pgid);
-		if (ret)
-			return ret;
+	pg = alloc_pg_mapping(0);
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
 
-		ceph_decode_32_safe(p, end, osd, e_inval);
-
-		ret = __remove_pg_mapping(&map->primary_temp, pgid);
-		BUG_ON(!incremental && ret != -ENOENT);
-
-		if (!incremental || osd != (u32)-1) {
-			struct ceph_pg_mapping *pg;
-
-			pg = kzalloc(sizeof(*pg), GFP_NOFS);
-			if (!pg)
-				return -ENOMEM;
-
-			pg->pgid = pgid;
-			pg->primary_temp.osd = osd;
-
-			ret = __insert_pg_mapping(pg, &map->primary_temp);
-			if (ret) {
-				kfree(pg);
-				return ret;
-			}
-		}
-	}
-
-	return 0;
+	pg->primary_temp.osd = osd;
+	return pg;
 
 e_inval:
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
 static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
 {
-	return __decode_primary_temp(p, end, map, false);
+	return decode_pg_mapping(p, end, &map->primary_temp,
+				 __decode_primary_temp, false);
 }
 
 static int decode_new_primary_temp(void **p, void *end,
 				   struct ceph_osdmap *map)
 {
-	return __decode_primary_temp(p, end, map, true);
+	return decode_pg_mapping(p, end, &map->primary_temp,
+				 __decode_primary_temp, true);
 }
 
 u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
@@ -1168,6 +1365,75 @@ static int decode_new_primary_affinity(void **p, void *end,
 	return -EINVAL;
 }
 
+static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
+						 bool __unused)
+{
+	return __decode_pg_temp(p, end, false);
+}
+
+static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 false);
+}
+
+static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+				 true);
+}
+
+static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
+}
+
+static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
+						       bool __unused)
+{
+	struct ceph_pg_mapping *pg;
+	u32 len, i;
+
+	ceph_decode_32_safe(p, end, len, e_inval);
+	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
+		return ERR_PTR(-EINVAL);
+
+	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
+	pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
+	if (!pg)
+		return ERR_PTR(-ENOMEM);
+
+	pg->pg_upmap_items.len = len;
+	for (i = 0; i < len; i++) {
+		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
+		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
+	}
+
+	return pg;
+
+e_inval:
+	return ERR_PTR(-EINVAL);
+}
+
+static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, false);
+}
+
+static int decode_new_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items,
+				 __decode_pg_upmap_items, true);
+}
+
+static int decode_old_pg_upmap_items(void **p, void *end,
+				     struct ceph_osdmap *map)
+{
+	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
+}
+
 /*
  * decode a full map.
  */
@@ -1218,13 +1484,21 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 
 	/* osd_state, osd_weight, osd_addrs->client_addr */
 	ceph_decode_need(p, end, 3*sizeof(u32) +
-			 map->max_osd*(1 + sizeof(*map->osd_weight) +
+			 map->max_osd*((struct_v >= 5 ? sizeof(u32) :
+							sizeof(u8)) +
+				       sizeof(*map->osd_weight) +
 				       sizeof(*map->osd_addr)), e_inval);
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
 
-	ceph_decode_copy(p, map->osd_state, map->max_osd);
+	if (struct_v >= 5) {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_32(p);
+	} else {
+		for (i = 0; i < map->max_osd; i++)
+			map->osd_state[i] = ceph_decode_8(p);
+	}
 
 	if (ceph_decode_32(p) != map->max_osd)
 		goto e_inval;
@@ -1257,9 +1531,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 		if (err)
 			goto bad;
 	} else {
-		/* XXX can this happen? */
-		kfree(map->osd_primary_affinity);
-		map->osd_primary_affinity = NULL;
+		WARN_ON(map->osd_primary_affinity);
 	}
 
 	/* crush */
@@ -1268,6 +1540,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 	if (err)
 		goto bad;
 
+	*p += len;
+	if (struct_v >= 3) {
+		/* erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	} else {
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
+		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
+	}
+
 	/* ignore the rest */
 	*p = end;
 
@@ -1314,7 +1606,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
  *     new_up_client: { osd=6, addr=... } # set osd_state and addr
  *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
  */
-static int decode_new_up_state_weight(void **p, void *end,
+static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
 				      struct ceph_osdmap *map)
 {
 	void *new_up_client;
@@ -1330,7 +1622,7 @@ static int decode_new_up_state_weight(void **p, void *end,
 
 	new_state = *p;
 	ceph_decode_32_safe(p, end, len, e_inval);
-	len *= sizeof(u32) + sizeof(u8);
+	len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
 	ceph_decode_need(p, end, len, e_inval);
 	*p += len;
 
@@ -1366,11 +1658,14 @@ static int decode_new_up_state_weight(void **p, void *end,
 	len = ceph_decode_32(p);
 	while (len--) {
 		s32 osd;
-		u8 xorstate;
+		u32 xorstate;
 		int ret;
 
 		osd = ceph_decode_32(p);
-		xorstate = ceph_decode_8(p);
+		if (struct_v >= 5)
+			xorstate = ceph_decode_32(p);
+		else
+			xorstate = ceph_decode_8(p);
 		if (xorstate == 0)
 			xorstate = CEPH_OSD_UP;
 		BUG_ON(osd >= map->max_osd);
@@ -1504,7 +1799,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	}
 
 	/* new_up_client, new_state, new_weight */
-	err = decode_new_up_state_weight(p, end, map);
+	err = decode_new_up_state_weight(p, end, struct_v, map);
 	if (err)
 		goto bad;
 
@@ -1527,6 +1822,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 			goto bad;
 	}
 
+	if (struct_v >= 3) {
+		/* new_erasure_code_profiles */
+		ceph_decode_skip_map_of_map(p, end, string, string, string,
+					    bad);
+		/* old_erasure_code_profiles */
+		ceph_decode_skip_set(p, end, string, bad);
+	}
+
+	if (struct_v >= 4) {
+		err = decode_new_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_new_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+
+		err = decode_old_pg_upmap_items(p, end, map);
+		if (err)
+			goto bad;
+	}
+
 	/* ignore the rest */
 	*p = end;
 
@@ -1547,12 +1868,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 void ceph_oloc_copy(struct ceph_object_locator *dest,
 		    const struct ceph_object_locator *src)
 {
-	WARN_ON(!ceph_oloc_empty(dest));
-	WARN_ON(dest->pool_ns); /* empty() only covers ->pool */
+	ceph_oloc_destroy(dest);
 
 	dest->pool = src->pool;
 	if (src->pool_ns)
 		dest->pool_ns = ceph_get_string(src->pool_ns);
+	else
+		dest->pool_ns = NULL;
 }
 EXPORT_SYMBOL(ceph_oloc_copy);
 
@@ -1565,14 +1887,15 @@ EXPORT_SYMBOL(ceph_oloc_destroy);
 void ceph_oid_copy(struct ceph_object_id *dest,
 		   const struct ceph_object_id *src)
 {
-	WARN_ON(!ceph_oid_empty(dest));
+	ceph_oid_destroy(dest);
 
 	if (src->name != src->inline_name) {
 		/* very rare, see ceph_object_id definition */
 		dest->name = kmalloc(src->name_len + 1,
 				     GFP_NOIO | __GFP_NOFAIL);
+	} else {
+		dest->name = dest->inline_name;
 	}
-
 	memcpy(dest->name, src->name, src->name_len + 1);
 	dest->name_len = src->name_len;
 }
@@ -1714,9 +2037,8 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
 	dest->primary = src->primary;
 }
 
-static bool is_split(const struct ceph_pg *pgid,
-		     u32 old_pg_num,
-		     u32 new_pg_num)
+bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
+		      u32 new_pg_num)
 {
 	int old_bits = calc_bits_of(old_pg_num);
 	int old_mask = (1 << old_bits) - 1;
@@ -1761,7 +2083,7 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       !osds_equal(old_up, new_up) ||
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
-	       is_split(pgid, old_pg_num, new_pg_num) ||
+	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
 	       old_sort_bitwise != new_sort_bitwise;
 }
 
@@ -1885,16 +2207,12 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * Should only be called with target_oid and target_oloc (as opposed to
  * base_oid and base_oloc), since tiering isn't taken into account.
  */
-int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
-			      struct ceph_object_id *oid,
-			      struct ceph_object_locator *oloc,
-			      struct ceph_pg *raw_pgid)
+int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
+				const struct ceph_object_id *oid,
+				const struct ceph_object_locator *oloc,
+				struct ceph_pg *raw_pgid)
 {
-	struct ceph_pg_pool_info *pi;
-
-	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
-	if (!pi)
-		return -ENOENT;
+	WARN_ON(pi->id != oloc->pool);
 
 	if (!oloc->pool_ns) {
 		raw_pgid->pool = oloc->pool;
@@ -1926,6 +2244,20 @@ int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
 	}
 	return 0;
 }
+
+int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
+			      const struct ceph_object_id *oid,
+			      const struct ceph_object_locator *oloc,
+			      struct ceph_pg *raw_pgid)
+{
+	struct ceph_pg_pool_info *pi;
+
+	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
+	if (!pi)
+		return -ENOENT;
+
+	return __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
+}
 EXPORT_SYMBOL(ceph_object_locator_to_pg);
 
 /*
@@ -1970,23 +2302,57 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
-		    const __u32 *weight, int weight_max)
+		    const __u32 *weight, int weight_max,
+		    u64 choose_args_index)
 {
+	struct crush_choose_arg_map *arg_map;
 	int r;
 
 	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
 
+	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+					choose_args_index);
+
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace);
+			  weight, weight_max, map->crush_workspace,
+			  arg_map ? arg_map->args : NULL);
 	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
 }
 
+static void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
+				    struct ceph_pg_pool_info *pi,
+				    struct ceph_osds *set)
+{
+	int i;
+
+	if (ceph_can_shift_osds(pi)) {
+		int removed = 0;
+
+		/* shift left */
+		for (i = 0; i < set->size; i++) {
+			if (!ceph_osd_exists(osdmap, set->osds[i])) {
+				removed++;
+				continue;
+			}
+			if (removed)
+				set->osds[i - removed] = set->osds[i];
+		}
+		set->size -= removed;
+	} else {
+		/* set dne devices to NONE */
+		for (i = 0; i < set->size; i++) {
+			if (!ceph_osd_exists(osdmap, set->osds[i]))
+				set->osds[i] = CRUSH_ITEM_NONE;
+		}
+	}
+}
+
 /*
- * Calculate raw set (CRUSH output) for given PG.  The result may
- * contain nonexistent OSDs.  ->primary is undefined for a raw set.
+ * Calculate raw set (CRUSH output) for given PG and filter out
+ * nonexistent OSDs.  ->primary is undefined for a raw set.
  *
  * Placement seed (CRUSH input) is returned through @ppps.
  */
@@ -2020,7 +2386,7 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 	}
 
 	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
-		       osdmap->osd_weight, osdmap->max_osd);
+		       osdmap->osd_weight, osdmap->max_osd, pi->id);
 	if (len < 0) {
 		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
 		       len, ruleno, pi->id, pi->crush_ruleset, pi->type,
@@ -2029,6 +2395,70 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 	}
 
 	raw->size = len;
+	remove_nonexistent_osds(osdmap, pi, raw);
+}
+
+/* apply pg_upmap[_items] mappings */
+static void apply_upmap(struct ceph_osdmap *osdmap,
+			const struct ceph_pg *pgid,
+			struct ceph_osds *raw)
+{
+	struct ceph_pg_mapping *pg;
+	int i, j;
+
+	pg = lookup_pg_mapping(&osdmap->pg_upmap, pgid);
+	if (pg) {
+		/* make sure targets aren't marked out */
+		for (i = 0; i < pg->pg_upmap.len; i++) {
+			int osd = pg->pg_upmap.osds[i];
+
+			if (osd != CRUSH_ITEM_NONE &&
+			    osd < osdmap->max_osd &&
+			    osdmap->osd_weight[osd] == 0) {
+				/* reject/ignore explicit mapping */
+				return;
+			}
+		}
+		for (i = 0; i < pg->pg_upmap.len; i++)
+			raw->osds[i] = pg->pg_upmap.osds[i];
+		raw->size = pg->pg_upmap.len;
+		return;
+	}
+
+	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
+	if (pg) {
+		/*
+		 * Note: this approach does not allow a bidirectional swap,
+		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
+		 */
+		for (i = 0; i < pg->pg_upmap_items.len; i++) {
+			int from = pg->pg_upmap_items.from_to[i][0];
+			int to = pg->pg_upmap_items.from_to[i][1];
+			int pos = -1;
+			bool exists = false;
+
+			/* make sure replacement doesn't already appear */
+			for (j = 0; j < raw->size; j++) {
+				int osd = raw->osds[j];
+
+				if (osd == to) {
+					exists = true;
+					break;
+				}
+				/* ignore mapping if target is marked out */
+				if (osd == from && pos < 0 &&
+				    !(to != CRUSH_ITEM_NONE &&
+				      to < osdmap->max_osd &&
+				      osdmap->osd_weight[to] == 0)) {
+					pos = j;
+				}
+			}
+			if (!exists && pos >= 0) {
+				raw->osds[pos] = to;
+				return;
+			}
+		}
+	}
 }
 
 /*
@@ -2151,18 +2581,16 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap,
  */
 static void get_temp_osds(struct ceph_osdmap *osdmap,
 			  struct ceph_pg_pool_info *pi,
-			  const struct ceph_pg *raw_pgid,
+			  const struct ceph_pg *pgid,
 			  struct ceph_osds *temp)
 {
-	struct ceph_pg pgid;
 	struct ceph_pg_mapping *pg;
 	int i;
 
-	raw_pg_to_pg(pi, raw_pgid, &pgid);
 	ceph_osds_init(temp);
 
 	/* pg_temp? */
-	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
+	pg = lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		for (i = 0; i < pg->pg_temp.len; i++) {
 			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
@@ -2185,7 +2613,7 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
 	}
 
 	/* primary_temp? */
-	pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid);
+	pg = lookup_pg_mapping(&osdmap->primary_temp, pgid);
 	if (pg)
 		temp->primary = pg->primary_temp.osd;
 }
@@ -2198,43 +2626,75 @@ static void get_temp_osds(struct ceph_osdmap *osdmap,
  * resend a request.
  */
 void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_pool_info *pi,
 			       const struct ceph_pg *raw_pgid,
 			       struct ceph_osds *up,
 			       struct ceph_osds *acting)
 {
-	struct ceph_pg_pool_info *pi;
+	struct ceph_pg pgid;
 	u32 pps;
 
-	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
-	if (!pi) {
-		ceph_osds_init(up);
-		ceph_osds_init(acting);
-		goto out;
-	}
+	WARN_ON(pi->id != raw_pgid->pool);
+	raw_pg_to_pg(pi, raw_pgid, &pgid);
 
 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
+	apply_upmap(osdmap, &pgid, up);
 	raw_to_up_osds(osdmap, pi, up);
 	apply_primary_affinity(osdmap, pi, pps, up);
-	get_temp_osds(osdmap, pi, raw_pgid, acting);
+	get_temp_osds(osdmap, pi, &pgid, acting);
 	if (!acting->size) {
 		memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
 		acting->size = up->size;
 		if (acting->primary == -1)
 			acting->primary = up->primary;
 	}
-out:
 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
 }
 
+bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
+			      struct ceph_pg_pool_info *pi,
+			      const struct ceph_pg *raw_pgid,
+			      struct ceph_spg *spgid)
+{
+	struct ceph_pg pgid;
+	struct ceph_osds up, acting;
+	int i;
+
+	WARN_ON(pi->id != raw_pgid->pool);
+	raw_pg_to_pg(pi, raw_pgid, &pgid);
+
+	if (ceph_can_shift_osds(pi)) {
+		spgid->pgid = pgid; /* struct */
+		spgid->shard = CEPH_SPG_NOSHARD;
+		return true;
+	}
+
+	ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
+	for (i = 0; i < acting.size; i++) {
+		if (acting.osds[i] == acting.primary) {
+			spgid->pgid = pgid; /* struct */
+			spgid->shard = i;
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /*
  * Return acting primary for given PG, or -1 if none.
  */
 int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
 			      const struct ceph_pg *raw_pgid)
 {
+	struct ceph_pg_pool_info *pi;
 	struct ceph_osds up, acting;
 
-	ceph_pg_to_up_acting_osds(osdmap, raw_pgid, &up, &acting);
+	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
+	if (!pi)
+		return -1;
+
+	ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
 	return acting.primary;
 }
 EXPORT_SYMBOL(ceph_pg_to_acting_primary);

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 6877c43..ee5647b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c

@@ -203,7 +203,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
 /**
  *	__skb_try_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
- *	@flags: MSG_ flags
+ *	@flags: MSG\_ flags
  *	@destructor: invoked under the receive lock on successful dequeue
  *	@peeked: returns non-zero if this packet has been seen before
  *	@off: an offset in bytes to peek skb from. Returns an offset
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
  *	skb_kill_datagram - Free a datagram skbuff forcibly
  *	@sk: socket
  *	@skb: datagram skbuff
- *	@flags: MSG_ flags
+ *	@flags: MSG\_ flags
  *
  *	This function frees a datagram skbuff that was received by
  *	skb_recv_datagram.  The flags argument must match the one
@@ -809,7 +809,7 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
  *	sequenced packet sockets providing the socket receive queue
  *	is only ever holding data ready to receive.
  *
- *	Note: when you _don't_ use this routine for this protocol,
+ *	Note: when you *don't* use this routine for this protocol,
  *	and you use a different write policy from sock_writeable()
  *	then please supply your own write_space callback.
  */

diff --git a/net/core/dev.c b/net/core/dev.c
index 0244051..8515f8f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -7384,7 +7384,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	BUG_ON(count < 1);
 
-	rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!rx)
 		return -ENOMEM;
 
@@ -7424,7 +7424,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	if (count < 1 || count > 0xffff)
 		return -EINVAL;
 
-	tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+	tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!tx)
 		return -ENOMEM;
 
@@ -7965,7 +7965,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	/* ensure 32-byte alignment of whole construct */
 	alloc_size += NETDEV_ALIGN - 1;
 
-	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
+	p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!p)
 		return NULL;
 

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b11341..f990eb8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 	gfp_head = gfp_mask;
 	if (gfp_head & __GFP_DIRECT_RECLAIM)
-		gfp_head |= __GFP_REPEAT;
+		gfp_head |= __GFP_RETRY_MAYFAIL;
 
 	*errcode = -ENOBUFS;
 	skb = alloc_skb(header_len, gfp_head);

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index bb909f1..06863ea 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -2431,8 +2431,8 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
 
 	mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
-	if (IS_ERR(mrt)) {
-		err = PTR_ERR(mrt);
+	if (!mrt) {
+		err = -ENOENT;
 		goto errout_free;
 	}
 

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 1770c1d..e164823 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c

@@ -1003,14 +1003,10 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
-	if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-		info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-	if (!info) {
-		info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
-				 PAGE_KERNEL);
-		if (!info)
-			return NULL;
-	}
+	info = kvmalloc(sz, GFP_KERNEL);
+	if (!info)
+		return NULL;
+
 	memset(info, 0, sizeof(*info));
 	info->size = size;
 	return info;

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 147fde7..263d16e 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c

@@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
 		return 0;
 
 	/* If XPS was setup, we can allocate memory on right NUMA node */
-	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
+	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
 			      netdev_queue_numa_node_read(sch->dev_queue));
 	if (!array)
 		return -ENOMEM;

diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fb39284..12649c9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c

@@ -34,6 +34,7 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/skcipher.h>
 #include <linux/err.h>
@@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	if (ret)
 		goto out_err;
 
-	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+	if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
 		ret = GSS_S_BAD_SIG;
 		goto out_err;
 	}

diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index f0c6a8c..46b295e 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c

@@ -55,15 +55,15 @@ enum {
 #define PROC(proc, name)				\
 [GSSX_##proc] = {					\
 	.p_proc   = GSSX_##proc,			\
-	.p_encode = (kxdreproc_t)gssx_enc_##name,	\
-	.p_decode = (kxdrdproc_t)gssx_dec_##name,	\
+	.p_encode = gssx_enc_##name,	\
+	.p_decode = gssx_dec_##name,	\
 	.p_arglen = GSSX_ARG_##name##_sz,		\
 	.p_replen = GSSX_RES_##name##_sz, 		\
 	.p_statidx = GSSX_##proc,			\
 	.p_name   = #proc,				\
 }
 
-static struct rpc_procinfo gssp_procedures[] = {
+static const struct rpc_procinfo gssp_procedures[] = {
 	PROC(INDICATE_MECHS, indicate_mechs),
         PROC(GET_CALL_CONTEXT, get_call_context),
         PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
@@ -364,11 +364,12 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
 /*
  * Initialization stuff
  */
-
+static unsigned int gssp_version1_counts[ARRAY_SIZE(gssp_procedures)];
 static const struct rpc_version gssp_version1 = {
 	.number		= GSSPROXY_VERS_1,
 	.nrprocs	= ARRAY_SIZE(gssp_procedures),
 	.procs		= gssp_procedures,
+	.counts		= gssp_version1_counts,
 };
 
 static const struct rpc_version *gssp_version[] = {

diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 25d9a9c..c4778ca 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c

@@ -44,7 +44,7 @@ static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
 }
 
 static int gssx_enc_buffer(struct xdr_stream *xdr,
-			   gssx_buffer *buf)
+			   const gssx_buffer *buf)
 {
 	__be32 *p;
 
@@ -56,7 +56,7 @@ static int gssx_enc_buffer(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_in_token(struct xdr_stream *xdr,
-			     struct gssp_in_token *in)
+			     const struct gssp_in_token *in)
 {
 	__be32 *p;
 
@@ -130,7 +130,7 @@ static int gssx_dec_option(struct xdr_stream *xdr,
 }
 
 static int dummy_enc_opt_array(struct xdr_stream *xdr,
-				struct gssx_option_array *oa)
+				const struct gssx_option_array *oa)
 {
 	__be32 *p;
 
@@ -348,7 +348,7 @@ static int gssx_dec_status(struct xdr_stream *xdr,
 }
 
 static int gssx_enc_call_ctx(struct xdr_stream *xdr,
-			     struct gssx_call_ctx *ctx)
+			     const struct gssx_call_ctx *ctx)
 {
 	struct gssx_option opt;
 	__be32 *p;
@@ -733,8 +733,9 @@ static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
 
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *arg)
+				 const void *data)
 {
+	const struct gssx_arg_accept_sec_context *arg = data;
 	int err;
 
 	err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
@@ -789,8 +790,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res)
+				void *data)
 {
+	struct gssx_res_accept_sec_context *res = data;
 	u32 value_follows;
 	int err;
 	struct page *scratch;

diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 9d88c62..146c310 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h

@@ -179,10 +179,10 @@ struct gssx_res_accept_sec_context {
 #define gssx_dec_init_sec_context NULL
 void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 				 struct xdr_stream *xdr,
-				 struct gssx_arg_accept_sec_context *args);
+				 const void *data);
 int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
 				struct xdr_stream *xdr,
-				struct gssx_res_accept_sec_context *res);
+				void *data);
 #define gssx_enc_release_handle NULL
 #define gssx_dec_release_handle NULL
 #define gssx_enc_get_mic NULL

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a54a7a3..7b1ee5a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c

@@ -838,6 +838,14 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
 	struct xdr_netobj mic;
 	struct xdr_buf integ_buf;
 
+	/* NFS READ normally uses splice to send data in-place. However
+	 * the data in cache can change after the reply's MIC is computed
+	 * but before the RPC reply is sent. To prevent the client from
+	 * rejecting the server-computed MIC in this somewhat rare case,
+	 * do not use splice with the GSS integrity service.
+	 */
+	clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
 	/* Did we already verify the signature on the original pass through? */
 	if (rqstp->rq_deferred)
 		return 0;

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5cb921..2e49d1f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c

@@ -1517,14 +1517,16 @@ static void
 call_start(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	int idx = task->tk_msg.rpc_proc->p_statidx;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
 			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
-	/* Increment call count */
-	task->tk_msg.rpc_proc->p_count++;
+	/* Increment call count (version might not be valid for ping) */
+	if (clnt->cl_program->version[clnt->cl_vers])
+		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 }
@@ -1672,7 +1674,7 @@ call_allocate(struct rpc_task *task)
 	unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
+	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 	int status;
 
 	dprint_status(task);
@@ -2476,16 +2478,18 @@ rpc_verify_header(struct rpc_task *task)
 	goto out_garbage;
 }
 
-static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		const void *obj)
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
+static int rpcproc_decode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+		void *obj)
 {
 	return 0;
 }
 
-static struct rpc_procinfo rpcproc_null = {
+static const struct rpc_procinfo rpcproc_null = {
 	.p_encode = rpcproc_encode_null,
 	.p_decode = rpcproc_decode_null,
 };

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5b30603..ea0676f 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c

@@ -128,13 +128,13 @@ struct rpcbind_args {
 	int			r_status;
 };
 
-static struct rpc_procinfo rpcb_procedures2[];
-static struct rpc_procinfo rpcb_procedures3[];
-static struct rpc_procinfo rpcb_procedures4[];
+static const struct rpc_procinfo rpcb_procedures2[];
+static const struct rpc_procinfo rpcb_procedures3[];
+static const struct rpc_procinfo rpcb_procedures4[];
 
 struct rpcb_info {
 	u32			rpc_vers;
-	struct rpc_procinfo *	rpc_proc;
+	const struct rpc_procinfo *rpc_proc;
 };
 
 static const struct rpcb_info rpcb_next_version[];
@@ -620,7 +620,8 @@ int rpcb_v4_register(struct net *net, const u32 program, const u32 version,
 	return -EAFNOSUPPORT;
 }
 
-static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt,
+		struct rpcbind_args *map, const struct rpc_procinfo *proc)
 {
 	struct rpc_message msg = {
 		.rpc_proc = proc,
@@ -671,7 +672,7 @@ static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt;
-	struct rpc_procinfo *proc;
+	const struct rpc_procinfo *proc;
 	u32 bind_version;
 	struct rpc_xprt *xprt;
 	struct rpc_clnt	*rpcb_clnt;
@@ -843,8 +844,9 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  */
 
 static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
@@ -860,8 +862,9 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	unsigned long port;
 	__be32 *p;
 
@@ -882,8 +885,9 @@ static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
-			unsigned int *boolp)
+			void *data)
 {
+	unsigned int *boolp = data;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -917,8 +921,9 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 }
 
 static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			     const struct rpcbind_args *rpcb)
+			     const void *data)
 {
+	const struct rpcbind_args *rpcb = data;
 	__be32 *p;
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
@@ -937,8 +942,9 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 }
 
 static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
-			    struct rpcbind_args *rpcb)
+			    void *data)
 {
+	struct rpcbind_args *rpcb = data;
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	__be32 *p;
@@ -989,11 +995,11 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
  * since the Linux kernel RPC code requires only these.
  */
 
-static struct rpc_procinfo rpcb_procedures2[] = {
+static const struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_mapping,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1002,8 +1008,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_mapping,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1012,8 +1018,8 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
+		.p_encode	= rpcb_enc_mapping,
+		.p_decode	= rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -1022,11 +1028,11 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures3[] = {
+static const struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1035,8 +1041,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1045,8 +1051,8 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -1055,11 +1061,11 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 };
 
-static struct rpc_procinfo rpcb_procedures4[] = {
+static const struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -1068,8 +1074,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -1078,8 +1084,8 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
+		.p_encode	= rpcb_enc_getaddr,
+		.p_decode	= rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -1112,22 +1118,28 @@ static const struct rpcb_info rpcb_next_version6[] = {
 	},
 };
 
+static unsigned int rpcb_version2_counts[ARRAY_SIZE(rpcb_procedures2)];
 static const struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
-	.procs		= rpcb_procedures2
+	.procs		= rpcb_procedures2,
+	.counts		= rpcb_version2_counts,
 };
 
+static unsigned int rpcb_version3_counts[ARRAY_SIZE(rpcb_procedures3)];
 static const struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
-	.procs		= rpcb_procedures3
+	.procs		= rpcb_procedures3,
+	.counts		= rpcb_version3_counts,
 };
 
+static unsigned int rpcb_version4_counts[ARRAY_SIZE(rpcb_procedures4)];
 static const struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
 	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
-	.procs		= rpcb_procedures4
+	.procs		= rpcb_procedures4,
+	.counts		= rpcb_version4_counts,
 };
 
 static const struct rpc_version *rpcb_version[] = {

diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index caeb01a..1e67133 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c

@@ -55,8 +55,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
 		seq_printf(seq, "proc%u %u",
 					vers->number, vers->nrprocs);
 		for (j = 0; j < vers->nrprocs; j++)
-			seq_printf(seq, " %u",
-					vers->procs[j].p_count);
+			seq_printf(seq, " %u", vers->counts[j]);
 		seq_putc(seq, '\n');
 	}
 	return 0;
@@ -78,9 +77,9 @@ static const struct file_operations rpc_proc_fops = {
 /*
  * Get RPC server stats
  */
-void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
+void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
+{
 	const struct svc_program *prog = statp->program;
-	const struct svc_procedure *proc;
 	const struct svc_version *vers;
 	unsigned int i, j;
 
@@ -99,11 +98,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
 			statp->rpcbadclnt);
 
 	for (i = 0; i < prog->pg_nvers; i++) {
-		if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+		vers = prog->pg_vers[i];
+		if (!vers)
 			continue;
 		seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
-		for (j = 0; j < vers->vs_nproc; j++, proc++)
-			seq_printf(seq, " %u", proc->pc_count);
+		for (j = 0; j < vers->vs_nproc; j++)
+			seq_printf(seq, " %u", vers->vs_count[j]);
 		seq_putc(seq, '\n');
 	}
 }
@@ -192,7 +192,7 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
 static void _print_name(struct seq_file *seq, unsigned int op,
-			struct rpc_procinfo *procs)
+			const struct rpc_procinfo *procs)
 {
 	if (procs[op].p_name)
 		seq_printf(seq, "\t%12s: ", procs[op].p_name);

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc0f5a0..85ce0db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c

@@ -1008,7 +1008,7 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
-	struct svc_version	*vers;
+	const struct svc_version *vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -1151,10 +1151,9 @@ static int
 svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 {
 	struct svc_program	*progp;
-	struct svc_version	*versp = NULL;	/* compiler food */
-	struct svc_procedure	*procp = NULL;
+	const struct svc_version *versp = NULL;	/* compiler food */
+	const struct svc_procedure *procp = NULL;
 	struct svc_serv		*serv = rqstp->rq_server;
-	kxdrproc_t		xdr;
 	__be32			*statp;
 	u32			prog, vers, proc;
 	__be32			auth_stat, rpc_stat;
@@ -1166,7 +1165,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	if (argv->iov_len < 6*4)
 		goto err_short_len;
 
-	/* Will be turned off only in gss privacy case: */
+	/* Will be turned off by GSS integrity and privacy services */
 	set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 	/* Will be turned off only when NFSv4 Sessions are used */
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
@@ -1262,7 +1261,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
-	procp->pc_count++;
+	versp->vs_count[proc]++;
 
 	/* Initialize storage for argp and resp */
 	memset(rqstp->rq_argp, 0, procp->pc_argsize);
@@ -1276,28 +1275,30 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Call the function that processes the request. */
 	if (!versp->vs_dispatch) {
-		/* Decode arguments */
-		xdr = procp->pc_decode;
-		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
+		/*
+		 * Decode arguments
+		 * XXX: why do we ignore the return value?
+		 */
+		if (procp->pc_decode &&
+		    !procp->pc_decode(rqstp, argv->iov_base))
 			goto err_garbage;
 
-		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+		*statp = procp->pc_func(rqstp);
 
 		/* Encode reply */
 		if (*statp == rpc_drop_reply ||
 		    test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 		if (*statp == rpc_autherr_badcred) {
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto err_bad_auth;
 		}
-		if (*statp == rpc_success &&
-		    (xdr = procp->pc_encode) &&
-		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
+		if (*statp == rpc_success && procp->pc_encode &&
+		    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
 			dprintk("svc: failed to encode reply\n");
 			/* serv->sv_stats->rpcsystemerr++; */
 			*statp = rpc_system_err;
@@ -1307,7 +1308,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		if (!versp->vs_dispatch(rqstp, statp)) {
 			/* Release reply info */
 			if (procp->pc_release)
-				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+				procp->pc_release(rqstp);
 			goto dropit;
 		}
 	}
@@ -1318,7 +1319,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
 	/* Release reply info */
 	if (procp->pc_release)
-		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+		procp->pc_release(rqstp);
 
 	if (procp->pc_encode == NULL)
 		goto dropit;

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7bfe1fb..d16a8b4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c

@@ -659,11 +659,13 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	int i;
 
 	/* now allocate needed pages.  If we get a failure, sleep briefly */
-	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
-	WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES);
-	if (pages >= RPCSVC_MAXPAGES)
+	pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
+	if (pages > RPCSVC_MAXPAGES) {
+		pr_warn_once("svc: warning: pages=%u > RPCSVC_MAXPAGES=%lu\n",
+			     pages, RPCSVC_MAXPAGES);
 		/* use as many pages as possible */
-		pages = RPCSVC_MAXPAGES - 1;
+		pages = RPCSVC_MAXPAGES;
+	}
 	for (i = 0; i < pages ; i++)
 		while (rqstp->rq_pages[i] == NULL) {
 			struct page *p = alloc_page(GFP_KERNEL);

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3e63c5e..4654a99 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c

@@ -1047,13 +1047,15 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
 	return ret;
 }
 
-static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
 
 	if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
 		goto out;
-	req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+	spin_unlock(&xprt->reserve_lock);
+	req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
+	spin_lock(&xprt->reserve_lock);
 	if (req != NULL)
 		goto out;
 	atomic_dec(&xprt->num_reqs);
@@ -1081,7 +1083,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 		list_del(&req->rq_list);
 		goto out_init_req;
 	}
-	req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT|__GFP_NOWARN);
+	req = xprt_dynamic_alloc_slot(xprt);
 	if (!IS_ERR(req))
 		goto out_init_req;
 	switch (PTR_ERR(req)) {

diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index c1ae814..b8213dd 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile

@@ -3,6 +3,6 @@
 rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	fmr_ops.o frwr_ops.o \
 	svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
-	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
-	svc_rdma_rw.o module.o
+	svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
+	module.o
 rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 59e6402..d3f84bb 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c

@@ -91,7 +91,7 @@ __fmr_unmap(struct rpcrdma_mw *mw)
 
 	list_add(&mw->fmr.fm_mr->list, &l);
 	rc = ib_unmap_fmr(&l);
-	list_del_init(&mw->fmr.fm_mr->list);
+	list_del(&mw->fmr.fm_mr->list);
 	return rc;
 }
 
@@ -213,13 +213,11 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 	}
-	mw->mw_nents = i;
 	mw->mw_dir = rpcrdma_data_dir(writing);
-	if (i == 0)
-		goto out_dmamap_err;
 
-	if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
-			   mw->mw_sg, mw->mw_nents, mw->mw_dir))
+	mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
+				     mw->mw_sg, i, mw->mw_dir);
+	if (!mw->mw_nents)
 		goto out_dmamap_err;
 
 	for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
@@ -237,16 +235,18 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return mw->mw_nents;
 
 out_dmamap_err:
-	pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
-	       mw->mw_sg, mw->mw_nents);
-	rpcrdma_defer_mr_recovery(mw);
+	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+	       mw->mw_sg, i);
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 
 out_maperr:
 	pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
 	       len, (unsigned long long)dma_pages[0],
 	       pageoff, mw->mw_nents, rc);
-	rpcrdma_defer_mr_recovery(mw);
+	ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+			mw->mw_sg, mw->mw_nents, mw->mw_dir);
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 }
 
@@ -255,24 +255,26 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
-	struct rpcrdma_mw *mw, *tmp;
+	struct rpcrdma_mw *mw;
 	LIST_HEAD(unmap_list);
 	int rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
 	/* ORDER: Invalidate all of the req's MRs first
 	 *
 	 * ib_unmap_fmr() is slow, so use a single call instead
 	 * of one call per mapped FMR.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list)
+	list_for_each_entry(mw, mws, mw_list) {
+		dprintk("RPC:       %s: unmapping fmr %p\n",
+			__func__, &mw->fmr);
 		list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
+	}
 	r_xprt->rx_stats.local_inv_needed++;
 	rc = ib_unmap_fmr(&unmap_list);
 	if (rc)
@@ -281,9 +283,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	/* ORDER: Now DMA unmap all of the req's MRs, and return
 	 * them to the free MW list.
 	 */
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
-		list_del_init(&mw->mw_list);
-		list_del_init(&mw->fmr.fm_mr->list);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
+		dprintk("RPC:       %s: DMA unmapping fmr %p\n",
+			__func__, &mw->fmr);
+		list_del(&mw->fmr.fm_mr->list);
 		ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
 				mw->mw_sg, mw->mw_nents, mw->mw_dir);
 		rpcrdma_put_mw(r_xprt, mw);
@@ -294,8 +298,9 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 out_reset:
 	pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
 
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
-		list_del_init(&mw->fmr.fm_mr->list);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
+		list_del(&mw->fmr.fm_mr->list);
 		fmr_op_recover_mr(mw);
 	}
 }

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index f81dd93..6aea36a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c

@@ -277,7 +277,7 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
 }
 
 /**
- * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
+ * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
@@ -298,7 +298,7 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
 }
 
 /**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
@@ -319,7 +319,7 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
 }
 
 /**
- * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
+ * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
  * @cq:	completion queue (ignored)
  * @wc:	completed WR
  *
@@ -355,7 +355,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	struct ib_mr *mr;
 	struct ib_reg_wr *reg_wr;
 	struct ib_send_wr *bad_wr;
-	int rc, i, n, dma_nents;
+	int rc, i, n;
 	u8 key;
 
 	mw = NULL;
@@ -391,14 +391,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 	}
-	mw->mw_nents = i;
 	mw->mw_dir = rpcrdma_data_dir(writing);
-	if (i == 0)
-		goto out_dmamap_err;
 
-	dma_nents = ib_dma_map_sg(ia->ri_device,
-				  mw->mw_sg, mw->mw_nents, mw->mw_dir);
-	if (!dma_nents)
+	mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
+	if (!mw->mw_nents)
 		goto out_dmamap_err;
 
 	n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
@@ -436,13 +432,14 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	return mw->mw_nents;
 
 out_dmamap_err:
-	pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
-	       mw->mw_sg, mw->mw_nents);
-	rpcrdma_defer_mr_recovery(mw);
+	pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
+	       mw->mw_sg, i);
+	frmr->fr_state = FRMR_IS_INVALID;
+	rpcrdma_put_mw(r_xprt, mw);
 	return -EIO;
 
 out_mapmr_err:
-	pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
+	pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
 	       frmr->fr_mr, n, mw->mw_nents);
 	rpcrdma_defer_mr_recovery(mw);
 	return -EIO;
@@ -458,21 +455,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  * Sleeps until it is safe for the host CPU to access the
  * previously mapped memory regions.
  *
- * Caller ensures that req->rl_registered is not empty.
+ * Caller ensures that @mws is not empty before the call. This
+ * function empties the list.
  */
 static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
 {
 	struct ib_send_wr *first, **prev, *last, *bad_wr;
-	struct rpcrdma_rep *rep = req->rl_reply;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_frmr *f;
 	struct rpcrdma_mw *mw;
 	int count, rc;
 
-	dprintk("RPC:       %s: req %p\n", __func__, req);
-
-	/* ORDER: Invalidate all of the req's MRs first
+	/* ORDER: Invalidate all of the MRs first
 	 *
 	 * Chain the LOCAL_INV Work Requests and post them with
 	 * a single ib_post_send() call.
@@ -480,11 +475,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	f = NULL;
 	count = 0;
 	prev = &first;
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
+	list_for_each_entry(mw, mws, mw_list) {
 		mw->frmr.fr_state = FRMR_IS_INVALID;
 
-		if ((rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) &&
-		    (mw->mw_handle == rep->rr_inv_rkey))
+		if (mw->mw_flags & RPCRDMA_MW_F_RI)
 			continue;
 
 		f = &mw->frmr;
@@ -524,18 +518,19 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	 * unless ri_id->qp is a valid pointer.
 	 */
 	r_xprt->rx_stats.local_inv_needed++;
+	bad_wr = NULL;
 	rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
+	if (bad_wr != first)
+		wait_for_completion(&f->fr_linv_done);
 	if (rc)
 		goto reset_mrs;
 
-	wait_for_completion(&f->fr_linv_done);
-
-	/* ORDER: Now DMA unmap all of the req's MRs, and return
+	/* ORDER: Now DMA unmap all of the MRs, and return
 	 * them to the free MW list.
 	 */
 unmap:
-	while (!list_empty(&req->rl_registered)) {
-		mw = rpcrdma_pop_mw(&req->rl_registered);
+	while (!list_empty(mws)) {
+		mw = rpcrdma_pop_mw(mws);
 		dprintk("RPC:       %s: DMA unmapping frmr %p\n",
 			__func__, &mw->frmr);
 		ib_dma_unmap_sg(ia->ri_device,
@@ -546,17 +541,19 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 
 reset_mrs:
 	pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
-	rdma_disconnect(ia->ri_id);
 
 	/* Find and reset the MRs in the LOCAL_INV WRs that did not
-	 * get posted. This is synchronous, and slow.
+	 * get posted.
 	 */
-	list_for_each_entry(mw, &req->rl_registered, mw_list) {
-		f = &mw->frmr;
-		if (mw->mw_handle == bad_wr->ex.invalidate_rkey) {
-			__frwr_reset_mr(ia, mw);
-			bad_wr = bad_wr->next;
-		}
+	rpcrdma_init_cqcount(&r_xprt->rx_ep, -count);
+	while (bad_wr) {
+		f = container_of(bad_wr, struct rpcrdma_frmr,
+				 fr_invwr);
+		mw = container_of(f, struct rpcrdma_mw, frmr);
+
+		__frwr_reset_mr(ia, mw);
+
+		bad_wr = bad_wr->next;
 	}
 	goto unmap;
 }

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 694e9b1..ca4d6e4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c

@@ -141,7 +141,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 
 	if (xdr->page_len) {
 		remaining = xdr->page_len;
-		offset = xdr->page_base & ~PAGE_MASK;
+		offset = offset_in_page(xdr->page_base);
 		count = 0;
 		while (remaining) {
 			remaining -= min_t(unsigned int,
@@ -222,7 +222,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 
 	len = xdrbuf->page_len;
 	ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
-	page_base = xdrbuf->page_base & ~PAGE_MASK;
+	page_base = offset_in_page(xdrbuf->page_base);
 	p = 0;
 	while (len && n < RPCRDMA_MAX_SEGS) {
 		if (!ppages[p]) {
@@ -540,7 +540,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 			goto out;
 
 		page = virt_to_page(xdr->tail[0].iov_base);
-		page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->tail[0].iov_base);
 
 		/* If the content in the page list is an odd length,
 		 * xdr_write_pages() has added a pad at the beginning
@@ -557,7 +557,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	 */
 	if (xdr->page_len) {
 		ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
-		page_base = xdr->page_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->page_base);
 		remaining = xdr->page_len;
 		while (remaining) {
 			sge_no++;
@@ -587,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 	 */
 	if (xdr->tail[0].iov_len) {
 		page = virt_to_page(xdr->tail[0].iov_base);
-		page_base = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+		page_base = offset_in_page(xdr->tail[0].iov_base);
 		len = xdr->tail[0].iov_len;
 
 map_tail:
@@ -734,6 +734,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 		rpclen = 0;
 	}
 
+	req->rl_xid = rqst->rq_xid;
+	rpcrdma_insert_req(&r_xprt->rx_buf, req);
+
 	/* This implementation supports the following combinations
 	 * of chunk lists in one RPC-over-RDMA Call message:
 	 *
@@ -875,9 +878,9 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 	srcp += curlen;
 	copy_len -= curlen;
 
-	page_base = rqst->rq_rcv_buf.page_base;
-	ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
-	page_base &= ~PAGE_MASK;
+	ppages = rqst->rq_rcv_buf.pages +
+		(rqst->rq_rcv_buf.page_base >> PAGE_SHIFT);
+	page_base = offset_in_page(rqst->rq_rcv_buf.page_base);
 	fixup_copy_count = 0;
 	if (copy_len && rqst->rq_rcv_buf.page_len) {
 		int pagelist_len;
@@ -928,6 +931,24 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
 	return fixup_copy_count;
 }
 
+/* Caller must guarantee @rep remains stable during this call.
+ */
+static void
+rpcrdma_mark_remote_invalidation(struct list_head *mws,
+				 struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_mw *mw;
+
+	if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
+		return;
+
+	list_for_each_entry(mw, mws, mw_list)
+		if (mw->mw_handle == rep->rr_inv_rkey) {
+			mw->mw_flags = RPCRDMA_MW_F_RI;
+			break; /* only one invalidated MR per RPC */
+		}
+}
+
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /* By convention, backchannel calls arrive via rdma_msg type
  * messages, and never populate the chunk lists. This makes
@@ -969,14 +990,16 @@ rpcrdma_reply_handler(struct work_struct *work)
 {
 	struct rpcrdma_rep *rep =
 			container_of(work, struct rpcrdma_rep, rr_work);
+	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	struct rpcrdma_msg *headerp;
 	struct rpcrdma_req *req;
 	struct rpc_rqst *rqst;
-	struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
-	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 	__be32 *iptr;
 	int rdmalen, status, rmerr;
 	unsigned long cwnd;
+	struct list_head mws;
 
 	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
 
@@ -994,27 +1017,45 @@ rpcrdma_reply_handler(struct work_struct *work)
 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
 	 * get context for handling any incoming chunks.
 	 */
-	spin_lock_bh(&xprt->transport_lock);
-	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
-	if (!rqst)
+	spin_lock(&buf->rb_lock);
+	req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
+					headerp->rm_xid);
+	if (!req)
 		goto out_nomatch;
-
-	req = rpcr_to_rdmar(rqst);
 	if (req->rl_reply)
 		goto out_duplicate;
 
-	/* Sanity checking has passed. We are now committed
-	 * to complete this transaction.
+	list_replace_init(&req->rl_registered, &mws);
+	rpcrdma_mark_remote_invalidation(&mws, rep);
+
+	/* Avoid races with signals and duplicate replies
+	 * by marking this req as matched.
 	 */
-	list_del_init(&rqst->rq_list);
-	spin_unlock_bh(&xprt->transport_lock);
+	req->rl_reply = rep;
+	spin_unlock(&buf->rb_lock);
+
 	dprintk("RPC:       %s: reply %p completes request %p (xid 0x%08x)\n",
 		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
-	/* from here on, the reply is no longer an orphan */
-	req->rl_reply = rep;
-	xprt->reestablish_timeout = 0;
+	/* Invalidate and unmap the data payloads before waking the
+	 * waiting application. This guarantees the memory regions
+	 * are properly fenced from the server before the application
+	 * accesses the data. It also ensures proper send flow control:
+	 * waking the next RPC waits until this RPC has relinquished
+	 * all its Send Queue entries.
+	 */
+	if (!list_empty(&mws))
+		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
 
+	/* Perform XID lookup, reconstruction of the RPC reply, and
+	 * RPC completion while holding the transport lock to ensure
+	 * the rep, rqst, and rq_task pointers remain stable.
+	 */
+	spin_lock_bh(&xprt->transport_lock);
+	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	if (!rqst)
+		goto out_norqst;
+	xprt->reestablish_timeout = 0;
 	if (headerp->rm_vers != rpcrdma_version)
 		goto out_badversion;
 
@@ -1024,12 +1065,9 @@ rpcrdma_reply_handler(struct work_struct *work)
 	case rdma_msg:
 		/* never expect read chunks */
 		/* never expect reply chunks (two ways to check) */
-		/* never expect write chunks without having offered RDMA */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
-		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
-		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
-		     list_empty(&req->rl_registered)))
+		     headerp->rm_body.rm_chunks[2] != xdr_zero))
 			goto badheader;
 		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
 			/* count any expected write chunks in read reply */
@@ -1066,8 +1104,7 @@ rpcrdma_reply_handler(struct work_struct *work)
 		/* never expect read or write chunks, always reply chunks */
 		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
 		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
-		    headerp->rm_body.rm_chunks[2] != xdr_one ||
-		    list_empty(&req->rl_registered))
+		    headerp->rm_body.rm_chunks[2] != xdr_one)
 			goto badheader;
 		iptr = (__be32 *)((unsigned char *)headerp +
 							RPCRDMA_HDRLEN_MIN);
@@ -1093,17 +1130,6 @@ rpcrdma_reply_handler(struct work_struct *work)
 	}
 
 out:
-	/* Invalidate and flush the data payloads before waking the
-	 * waiting application. This guarantees the memory region is
-	 * properly fenced from the server before the application
-	 * accesses the data. It also ensures proper send flow
-	 * control: waking the next RPC waits until this RPC has
-	 * relinquished all its Send Queue entries.
-	 */
-	if (!list_empty(&req->rl_registered))
-		r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
-
-	spin_lock_bh(&xprt->transport_lock);
 	cwnd = xprt->cwnd;
 	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
 	if (xprt->cwnd > cwnd)
@@ -1112,7 +1138,7 @@ rpcrdma_reply_handler(struct work_struct *work)
 	xprt_complete_rqst(rqst->rq_task, status);
 	spin_unlock_bh(&xprt->transport_lock);
 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
-			__func__, xprt, rqst, status);
+		__func__, xprt, rqst, status);
 	return;
 
 out_badstatus:
@@ -1161,26 +1187,37 @@ rpcrdma_reply_handler(struct work_struct *work)
 	r_xprt->rx_stats.bad_reply_count++;
 	goto out;
 
-/* If no pending RPC transaction was matched, post a replacement
- * receive buffer before returning.
+/* The req was still available, but by the time the transport_lock
+ * was acquired, the rqst and task had been released. Thus the RPC
+ * has already been terminated.
  */
+out_norqst:
+	spin_unlock_bh(&xprt->transport_lock);
+	rpcrdma_buffer_put(req);
+	dprintk("RPC:       %s: race, no rqst left for req %p\n",
+		__func__, req);
+	return;
+
 out_shortreply:
 	dprintk("RPC:       %s: short/invalid reply\n", __func__);
 	goto repost;
 
 out_nomatch:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&buf->rb_lock);
 	dprintk("RPC:       %s: no match for incoming xid 0x%08x len %d\n",
 		__func__, be32_to_cpu(headerp->rm_xid),
 		rep->rr_len);
 	goto repost;
 
 out_duplicate:
-	spin_unlock_bh(&xprt->transport_lock);
+	spin_unlock(&buf->rb_lock);
 	dprintk("RPC:       %s: "
 		"duplicate reply %p to RPC request %p: xid 0x%08x\n",
 		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
 
+/* If no pending RPC transaction was matched, post a replacement
+ * receive buffer before returning.
+ */
 repost:
 	r_xprt->rx_stats.bad_reply_count++;
 	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep))

diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
deleted file mode 100644
index bdcf7d8..0000000
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ /dev/null

@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle. All rights reserved.
- * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the BSD-type
- * license below:
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *      Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *
- *      Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following
- *      disclaimer in the documentation and/or other materials provided
- *      with the distribution.
- *
- *      Neither the name of the Network Appliance, Inc. nor the names of
- *      its contributors may be used to endorse or promote products
- *      derived from this software without specific prior written
- *      permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Tom Tucker <tom@opengridcomputing.com>
- */
-
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/debug.h>
-#include <asm/unaligned.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/sunrpc/svc_rdma.h>
-
-#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
-
-static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + rpcrdma_readchunk_maxsz - 1;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	while (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
-{
-	__be32 *next;
-
-	if (*p++ != xdr_zero) {
-		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
-		if (next > end)
-			return NULL;
-		p = next;
-	}
-	return p;
-}
-
-/**
- * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
- * @rq_arg: Receive buffer
- *
- * On entry, xdr->head[0].iov_base points to first byte in the
- * RPC-over-RDMA header.
- *
- * On successful exit, head[0] points to first byte past the
- * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
- * The length of the RPC-over-RDMA header is returned.
- */
-int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
-{
-	__be32 *p, *end, *rdma_argp;
-	unsigned int hdr_len;
-
-	/* Verify that there's enough bytes for header + something */
-	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
-		goto out_short;
-
-	rdma_argp = rq_arg->head[0].iov_base;
-	if (*(rdma_argp + 1) != rpcrdma_version)
-		goto out_version;
-
-	switch (*(rdma_argp + 3)) {
-	case rdma_msg:
-	case rdma_nomsg:
-		break;
-
-	case rdma_done:
-		goto out_drop;
-
-	case rdma_error:
-		goto out_drop;
-
-	default:
-		goto out_proc;
-	}
-
-	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
-	p = xdr_check_read_list(rdma_argp + 4, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_write_list(p, end);
-	if (!p)
-		goto out_inval;
-	p = xdr_check_reply_chunk(p, end);
-	if (!p)
-		goto out_inval;
-	if (p > end)
-		goto out_inval;
-
-	rq_arg->head[0].iov_base = p;
-	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
-	rq_arg->head[0].iov_len -= hdr_len;
-	return hdr_len;
-
-out_short:
-	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
-	return -EINVAL;
-
-out_version:
-	dprintk("svcrdma: bad xprt version: %u\n",
-		be32_to_cpup(rdma_argp + 1));
-	return -EPROTONOSUPPORT;
-
-out_drop:
-	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
-	return 0;
-
-out_proc:
-	dprintk("svcrdma: bad rdma procedure (%u)\n",
-		be32_to_cpup(rdma_argp + 3));
-	return -EINVAL;
-
-out_inval:
-	dprintk("svcrdma: failed to parse transport header\n");
-	return -EINVAL;
-}

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 27a99bf..ad4bd62 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016, 2017 Oracle. All rights reserved.
  * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
@@ -40,12 +41,66 @@
  * Author: Tom Tucker <tom@opengridcomputing.com>
  */
 
-#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
-#include <linux/spinlock.h>
+/* Operation
+ *
+ * The main entry point is svc_rdma_recvfrom. This is called from
+ * svc_recv when the transport indicates there is incoming data to
+ * be read. "Data Ready" is signaled when an RDMA Receive completes,
+ * or when a set of RDMA Reads complete.
+ *
+ * An svc_rqst is passed in. This structure contains an array of
+ * free pages (rq_pages) that will contain the incoming RPC message.
+ *
+ * Short messages are moved directly into svc_rqst::rq_arg, and
+ * the RPC Call is ready to be processed by the Upper Layer.
+ * svc_rdma_recvfrom returns the length of the RPC Call message,
+ * completing the reception of the RPC Call.
+ *
+ * However, when an incoming message has Read chunks,
+ * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
+ * data payload from the client. svc_rdma_recvfrom sets up the
+ * RDMA Reads using pages in svc_rqst::rq_pages, which are
+ * transferred to an svc_rdma_op_ctxt for the duration of the
+ * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
+ * is still not yet ready.
+ *
+ * When the Read chunk payloads have become available on the
+ * server, "Data Ready" is raised again, and svc_recv calls
+ * svc_rdma_recvfrom again. This second call may use a different
+ * svc_rqst than the first one, thus any information that needs
+ * to be preserved across these two calls is kept in an
+ * svc_rdma_op_ctxt.
+ *
+ * The second call to svc_rdma_recvfrom performs final assembly
+ * of the RPC Call message, using the RDMA Read sink pages kept in
+ * the svc_rdma_op_ctxt. The xdr_buf is copied from the
+ * svc_rdma_op_ctxt to the second svc_rqst. The second call returns
+ * the length of the completed RPC Call message.
+ *
+ * Page Management
+ *
+ * Pages under I/O must be transferred from the first svc_rqst to an
+ * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns.
+ *
+ * The first svc_rqst supplies pages for RDMA Reads. These are moved
+ * from rqstp::rq_pages into ctxt::pages. The consumed elements of
+ * the rq_pages array are set to NULL and refilled with the first
+ * svc_rdma_recvfrom call returns.
+ *
+ * During the second svc_rdma_recvfrom call, RDMA Read sink pages
+ * are transferred from the svc_rdma_op_ctxt to the second svc_rqst
+ * (see rdma_read_complete() below).
+ */
+
 #include <asm/unaligned.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
+
+#include <linux/spinlock.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/svc_rdma.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
@@ -59,7 +114,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 			       struct svc_rdma_op_ctxt *ctxt,
 			       u32 byte_count)
 {
-	struct rpcrdma_msg *rmsgp;
 	struct page *page;
 	u32 bc;
 	int sge_no;
@@ -83,20 +137,12 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.page_len = bc;
 	rqstp->rq_arg.page_base = 0;
 
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-	if (rmsgp->rm_type == rdma_nomsg)
-		rqstp->rq_arg.pages = &rqstp->rq_pages[0];
-	else
-		rqstp->rq_arg.pages = &rqstp->rq_pages[1];
-
 	sge_no = 1;
 	while (bc && sge_no < ctxt->count) {
 		page = ctxt->pages[sge_no];
 		put_page(rqstp->rq_pages[sge_no]);
 		rqstp->rq_pages[sge_no] = page;
 		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
-		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
 		sge_no++;
 	}
 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
@@ -115,406 +161,208 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-/* Issue an RDMA_READ using the local lkey to map the data sink */
-int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
-			struct svc_rqst *rqstp,
-			struct svc_rdma_op_ctxt *head,
-			int *page_no,
-			u32 *page_offset,
-			u32 rs_handle,
-			u32 rs_length,
-			u64 rs_offset,
-			bool last)
-{
-	struct ib_rdma_wr read_wr;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	int ret, read, pno;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->read_hdr = head;
-	pages_needed = min_t(int, pages_needed, xprt->sc_max_sge_rd);
-	read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
-		     rs_length);
-
-	for (pno = 0; pno < pages_needed; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		ctxt->sge[pno].addr =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], pg_off,
-					PAGE_SIZE - pg_off,
-					DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   ctxt->sge[pno].addr);
-		if (ret)
-			goto err;
-		svc_rdma_count_mappings(xprt, ctxt);
-
-		ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
-		ctxt->sge[pno].length = len;
-		ctxt->count++;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.opcode = IB_WR_RDMA_READ;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = pages_needed;
-
-	ret = svc_rdma_send(xprt, &read_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 0);
-	return ret;
-}
-
-/* Issue an RDMA_READ using an FRMR to map the data sink */
-int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *head,
-			 int *page_no,
-			 u32 *page_offset,
-			 u32 rs_handle,
-			 u32 rs_length,
-			 u64 rs_offset,
-			 bool last)
-{
-	struct ib_rdma_wr read_wr;
-	struct ib_send_wr inv_wr;
-	struct ib_reg_wr reg_wr;
-	u8 key;
-	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
-	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
-	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno, dma_nents, n;
-	u32 pg_off = *page_offset;
-	u32 pg_no = *page_no;
-
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-
-	ctxt->direction = DMA_FROM_DEVICE;
-	ctxt->frmr = frmr;
-	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
-
-	frmr->direction = DMA_FROM_DEVICE;
-	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->sg_nents = nents;
-
-	for (pno = 0; pno < nents; pno++) {
-		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
-
-		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
-		head->arg.page_len += len;
-		head->arg.len += len;
-		if (!pg_off)
-			head->count++;
-
-		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
-			    len, pg_off);
-
-		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
-		rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-		/* adjust offset and wrap to next page if needed */
-		pg_off += len;
-		if (pg_off == PAGE_SIZE) {
-			pg_off = 0;
-			pg_no++;
-		}
-		rs_length -= len;
-	}
-
-	if (last && rs_length == 0)
-		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-
-	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
-				  frmr->sg, frmr->sg_nents,
-				  frmr->direction);
-	if (!dma_nents) {
-		pr_err("svcrdma: failed to dma map sg %p\n",
-		       frmr->sg);
-		return -ENOMEM;
-	}
-
-	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, NULL, PAGE_SIZE);
-	if (unlikely(n != frmr->sg_nents)) {
-		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
-		       frmr->mr, n, frmr->sg_nents);
-		return n < 0 ? n : -EINVAL;
-	}
-
-	/* Bump the key */
-	key = (u8)(frmr->mr->lkey & 0x000000FF);
-	ib_update_fast_reg_key(frmr->mr, ++key);
-
-	ctxt->sge[0].addr = frmr->mr->iova;
-	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = frmr->mr->length;
-	ctxt->count = 1;
-	ctxt->read_hdr = head;
-
-	/* Prepare REG WR */
-	ctxt->reg_cqe.done = svc_rdma_wc_reg;
-	reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
-	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
-	reg_wr.wr.num_sge = 0;
-	reg_wr.mr = frmr->mr;
-	reg_wr.key = frmr->mr->lkey;
-	reg_wr.access = frmr->access_flags;
-	reg_wr.wr.next = &read_wr.wr;
-
-	/* Prepare RDMA_READ */
-	memset(&read_wr, 0, sizeof(read_wr));
-	ctxt->cqe.done = svc_rdma_wc_read;
-	read_wr.wr.wr_cqe = &ctxt->cqe;
-	read_wr.wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.rkey = rs_handle;
-	read_wr.remote_addr = rs_offset;
-	read_wr.wr.sg_list = ctxt->sge;
-	read_wr.wr.num_sge = 1;
-	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
-		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
-	} else {
-		read_wr.wr.opcode = IB_WR_RDMA_READ;
-		read_wr.wr.next = &inv_wr;
-		/* Prepare invalidate */
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		ctxt->inv_cqe.done = svc_rdma_wc_inv;
-		inv_wr.wr_cqe = &ctxt->inv_cqe;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
-		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
-	}
-
-	/* Post the chain */
-	ret = svc_rdma_send(xprt, &reg_wr.wr);
-	if (ret) {
-		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
-		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-		goto err;
-	}
-
-	/* return current location in page array */
-	*page_no = pg_no;
-	*page_offset = pg_off;
-	ret = read;
-	atomic_inc(&rdma_stat_read);
-	return ret;
- err:
-	svc_rdma_put_context(ctxt, 0);
-	svc_rdma_put_frmr(xprt, frmr);
-	return ret;
-}
-
-static unsigned int
-rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
-{
-	unsigned int count;
-
-	for (count = 0; ch->rc_discrim != xdr_zero; ch++)
-		count++;
-	return count;
-}
-
-/* If there was additional inline content, append it to the end of arg.pages.
- * Tail copy has to be done after the reader function has determined how many
- * pages are needed for RDMA READ.
+/* This accommodates the largest possible Write chunk,
+ * in one segment.
  */
-static int
-rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
-	       u32 position, u32 byte_count, u32 page_offset, int page_no)
-{
-	char *srcp, *destp;
+#define MAX_BYTES_WRITE_SEG	((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
 
-	srcp = head->arg.head[0].iov_base + position;
-	byte_count = head->arg.head[0].iov_len - position;
-	if (byte_count > PAGE_SIZE) {
-		dprintk("svcrdma: large tail unsupported\n");
-		return 0;
-	}
-
-	/* Fit as much of the tail on the current page as possible */
-	if (page_offset != PAGE_SIZE) {
-		destp = page_address(rqstp->rq_arg.pages[page_no]);
-		destp += page_offset;
-		while (byte_count--) {
-			*destp++ = *srcp++;
-			page_offset++;
-			if (page_offset == PAGE_SIZE && byte_count)
-				goto more;
-		}
-		goto done;
-	}
-
-more:
-	/* Fit the rest on the next page */
-	page_no++;
-	destp = page_address(rqstp->rq_arg.pages[page_no]);
-	while (byte_count--)
-		*destp++ = *srcp++;
-
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
-done:
-	byte_count = head->arg.head[0].iov_len - position;
-	head->arg.page_len += byte_count;
-	head->arg.len += byte_count;
-	head->arg.buflen += byte_count;
-	return 1;
-}
-
-/* Returns the address of the first read chunk or <nul> if no read chunk
- * is present
+/* This accommodates the largest possible Position-Zero
+ * Read chunk or Reply chunk, in one segment.
  */
-static struct rpcrdma_read_chunk *
-svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp)
-{
-	struct rpcrdma_read_chunk *ch =
-		(struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+#define MAX_BYTES_SPECIAL_SEG	((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
 
-	if (ch->rc_discrim == xdr_zero)
-		return NULL;
-	return ch;
-}
-
-static int rdma_read_chunks(struct svcxprt_rdma *xprt,
-			    struct rpcrdma_msg *rmsgp,
-			    struct svc_rqst *rqstp,
-			    struct svc_rdma_op_ctxt *head)
+/* Sanity check the Read list.
+ *
+ * Implementation limits:
+ * - This implementation supports only one Read chunk.
+ *
+ * Sanity checks:
+ * - Read list does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * The segment count is limited to how many segments can
+ * fit in the transport header without overflowing the
+ * buffer. That's about 40 Read segments for a 1KB inline
+ * threshold.
+ *
+ * Returns pointer to the following Write list.
+ */
+static __be32 *xdr_check_read_list(__be32 *p, const __be32 *end)
 {
-	int page_no, ret;
-	struct rpcrdma_read_chunk *ch;
-	u32 handle, page_offset, byte_count;
 	u32 position;
-	u64 rs_offset;
-	bool last;
+	bool first;
 
-	/* If no read list is present, return 0 */
-	ch = svc_rdma_get_read_chunk(rmsgp);
-	if (!ch)
-		return 0;
-
-	if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
-		return -EINVAL;
-
-	/* The request is completed when the RDMA_READs complete. The
-	 * head context keeps all the pages that comprise the
-	 * request.
-	 */
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->hdr_count = head->count;
-	head->arg.page_base = 0;
-	head->arg.page_len = 0;
-	head->arg.len = rqstp->rq_arg.len;
-	head->arg.buflen = rqstp->rq_arg.buflen;
-
-	/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
-	position = be32_to_cpu(ch->rc_position);
-	if (position == 0) {
-		head->arg.pages = &head->pages[0];
-		page_offset = head->byte_len;
-	} else {
-		head->arg.pages = &head->pages[head->count];
-		page_offset = 0;
-	}
-
-	ret = 0;
-	page_no = 0;
-	for (; ch->rc_discrim != xdr_zero; ch++) {
-		if (be32_to_cpu(ch->rc_position) != position)
-			goto err;
-
-		handle = be32_to_cpu(ch->rc_target.rs_handle),
-		byte_count = be32_to_cpu(ch->rc_target.rs_length);
-		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
-				 &rs_offset);
-
-		while (byte_count > 0) {
-			last = (ch + 1)->rc_discrim == xdr_zero;
-			ret = xprt->sc_reader(xprt, rqstp, head,
-					      &page_no, &page_offset,
-					      handle, byte_count,
-					      rs_offset, last);
-			if (ret < 0)
-				goto err;
-			byte_count -= ret;
-			rs_offset += ret;
-			head->arg.buflen += ret;
+	first = true;
+	while (*p++ != xdr_zero) {
+		if (first) {
+			position = be32_to_cpup(p++);
+			first = false;
+		} else if (be32_to_cpup(p++) != position) {
+			return NULL;
 		}
+		p++;	/* handle */
+		if (be32_to_cpup(p++) > MAX_BYTES_SPECIAL_SEG)
+			return NULL;
+		p += 2;	/* offset */
+
+		if (p > end)
+			return NULL;
+	}
+	return p;
+}
+
+/* The segment count is limited to how many segments can
+ * fit in the transport header without overflowing the
+ * buffer. That's about 60 Write segments for a 1KB inline
+ * threshold.
+ */
+static __be32 *xdr_check_write_chunk(__be32 *p, const __be32 *end,
+				     u32 maxlen)
+{
+	u32 i, segcount;
+
+	segcount = be32_to_cpup(p++);
+	for (i = 0; i < segcount; i++) {
+		p++;	/* handle */
+		if (be32_to_cpup(p++) > maxlen)
+			return NULL;
+		p += 2;	/* offset */
+
+		if (p > end)
+			return NULL;
 	}
 
-	/* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
-	if (page_offset & 3) {
-		u32 pad = 4 - (page_offset & 3);
+	return p;
+}
 
-		head->arg.tail[0].iov_len += pad;
-		head->arg.len += pad;
-		head->arg.buflen += pad;
-		page_offset += pad;
+/* Sanity check the Write list.
+ *
+ * Implementation limits:
+ * - This implementation supports only one Write chunk.
+ *
+ * Sanity checks:
+ * - Write list does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * Returns pointer to the following Reply chunk.
+ */
+static __be32 *xdr_check_write_list(__be32 *p, const __be32 *end)
+{
+	u32 chcount;
+
+	chcount = 0;
+	while (*p++ != xdr_zero) {
+		p = xdr_check_write_chunk(p, end, MAX_BYTES_WRITE_SEG);
+		if (!p)
+			return NULL;
+		if (chcount++ > 1)
+			return NULL;
+	}
+	return p;
+}
+
+/* Sanity check the Reply chunk.
+ *
+ * Sanity checks:
+ * - Reply chunk does not overflow buffer.
+ * - Segment size limited by largest NFS data payload.
+ *
+ * Returns pointer to the following RPC header.
+ */
+static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
+{
+	if (*p++ != xdr_zero) {
+		p = xdr_check_write_chunk(p, end, MAX_BYTES_SPECIAL_SEG);
+		if (!p)
+			return NULL;
+	}
+	return p;
+}
+
+/* On entry, xdr->head[0].iov_base points to first byte in the
+ * RPC-over-RDMA header.
+ *
+ * On successful exit, head[0] points to first byte past the
+ * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
+ * The length of the RPC-over-RDMA header is returned.
+ *
+ * Assumptions:
+ * - The transport header is entirely contained in the head iovec.
+ */
+static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
+{
+	__be32 *p, *end, *rdma_argp;
+	unsigned int hdr_len;
+	char *proc;
+
+	/* Verify that there's enough bytes for header + something */
+	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+		goto out_short;
+
+	rdma_argp = rq_arg->head[0].iov_base;
+	if (*(rdma_argp + 1) != rpcrdma_version)
+		goto out_version;
+
+	switch (*(rdma_argp + 3)) {
+	case rdma_msg:
+		proc = "RDMA_MSG";
+		break;
+	case rdma_nomsg:
+		proc = "RDMA_NOMSG";
+		break;
+
+	case rdma_done:
+		goto out_drop;
+
+	case rdma_error:
+		goto out_drop;
+
+	default:
+		goto out_proc;
 	}
 
-	ret = 1;
-	if (position && position < head->arg.head[0].iov_len)
-		ret = rdma_copy_tail(rqstp, head, position,
-				     byte_count, page_offset, page_no);
-	head->arg.head[0].iov_len = position;
-	head->position = position;
+	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+	p = xdr_check_read_list(rdma_argp + 4, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_write_list(p, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_reply_chunk(p, end);
+	if (!p)
+		goto out_inval;
+	if (p > end)
+		goto out_inval;
 
- err:
-	/* Detach arg pages. svc_recv will replenish them */
-	for (page_no = 0;
-	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
-		rqstp->rq_pages[page_no] = NULL;
+	rq_arg->head[0].iov_base = p;
+	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
+	rq_arg->head[0].iov_len -= hdr_len;
+	rq_arg->len -= hdr_len;
+	dprintk("svcrdma: received %s request for XID 0x%08x, hdr_len=%u\n",
+		proc, be32_to_cpup(rdma_argp), hdr_len);
+	return hdr_len;
 
-	return ret;
+out_short:
+	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+	return -EINVAL;
+
+out_version:
+	dprintk("svcrdma: bad xprt version: %u\n",
+		be32_to_cpup(rdma_argp + 1));
+	return -EPROTONOSUPPORT;
+
+out_drop:
+	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+	return 0;
+
+out_proc:
+	dprintk("svcrdma: bad rdma procedure (%u)\n",
+		be32_to_cpup(rdma_argp + 3));
+	return -EINVAL;
+
+out_inval:
+	dprintk("svcrdma: failed to parse transport header\n");
+	return -EINVAL;
 }
 
 static void rdma_read_complete(struct svc_rqst *rqstp,
@@ -528,24 +376,9 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
 		rqstp->rq_pages[page_no] = head->pages[page_no];
 	}
 
-	/* Adjustments made for RDMA_NOMSG type requests */
-	if (head->position == 0) {
-		if (head->arg.len <= head->sge[0].length) {
-			head->arg.head[0].iov_len = head->arg.len -
-							head->byte_len;
-			head->arg.page_len = 0;
-		} else {
-			head->arg.head[0].iov_len = head->sge[0].length -
-								head->byte_len;
-			head->arg.page_len = head->arg.len -
-						head->sge[0].length;
-		}
-	}
-
 	/* Point rq_arg.pages past header */
 	rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
 	rqstp->rq_arg.page_len = head->arg.page_len;
-	rqstp->rq_arg.page_base = head->arg.page_base;
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_pages[page_no];
@@ -642,21 +475,44 @@ static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
 	return true;
 }
 
-/*
- * Set up the rqstp thread context to point to the RQ buffer. If
- * necessary, pull additional data from the client with an RDMA_READ
- * request.
+/**
+ * svc_rdma_recvfrom - Receive an RPC call
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Returns:
+ *	The positive number of bytes in the RPC Call message,
+ *	%0 if there were no Calls ready to return,
+ *	%-EINVAL if the Read chunk data is too large,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only
+ * when there are no remaining ctxt's to process.
+ *
+ * The next ctxt is removed from the "receive" lists.
+ *
+ * - If the ctxt completes a Read, then finish assembling the Call
+ *   message and return the number of bytes in the message.
+ *
+ * - If the ctxt completes a Receive, then construct the Call
+ *   message from the contents of the Receive buffer.
+ *
+ *   - If there are no Read chunks in this message, then finish
+ *     assembling the Call message and return the number of bytes
+ *     in the message.
+ *
+ *   - If there are Read chunks in this message, post Read WRs to
+ *     pull that payload and return 0.
  */
 int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 {
 	struct svc_xprt *xprt = rqstp->rq_xprt;
 	struct svcxprt_rdma *rdma_xprt =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct rpcrdma_msg *rmsgp;
-	int ret = 0;
-
-	dprintk("svcrdma: rqstp=%p\n", rqstp);
+	struct svc_rdma_op_ctxt *ctxt;
+	__be32 *p;
+	int ret;
 
 	spin_lock(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
@@ -671,22 +527,14 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 					struct svc_rdma_op_ctxt, list);
 		list_del(&ctxt->list);
 	} else {
-		atomic_inc(&rdma_stat_rq_starve);
+		/* No new incoming requests, terminate the loop */
 		clear_bit(XPT_DATA, &xprt->xpt_flags);
-		ctxt = NULL;
+		spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+		return 0;
 	}
 	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
-	if (!ctxt) {
-		/* This is the EAGAIN path. The svc_recv routine will
-		 * return -EAGAIN, the nfsd thread will go to call into
-		 * svc_recv again and we shouldn't be on the active
-		 * transport list
-		 */
-		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
-			goto defer;
-		goto out;
-	}
-	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p\n",
+
+	dprintk("svcrdma: recvfrom: ctxt=%p on xprt=%p, rqstp=%p\n",
 		ctxt, rdma_xprt, rqstp);
 	atomic_inc(&rdma_stat_recv);
 
@@ -694,7 +542,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
 
 	/* Decode the RDMA header. */
-	rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
+	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
 	if (ret < 0)
 		goto out_err;
@@ -702,9 +550,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		goto out_drop;
 	rqstp->rq_xprt_hlen = ret;
 
-	if (svc_rdma_is_backchannel_reply(xprt, &rmsgp->rm_xid)) {
-		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt,
-					       &rmsgp->rm_xid,
+	if (svc_rdma_is_backchannel_reply(xprt, p)) {
+		ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
 					       &rqstp->rq_arg);
 		svc_rdma_put_context(ctxt, 0);
 		if (ret)
@@ -712,39 +559,34 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		return ret;
 	}
 
-	/* Read read-list data. */
-	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
-	if (ret > 0) {
-		/* read-list posted, defer until data received from client. */
-		goto defer;
-	} else if (ret < 0) {
-		/* Post of read-list failed, free context. */
-		svc_rdma_put_context(ctxt, 1);
-		return 0;
-	}
+	p += rpcrdma_fixed_maxsz;
+	if (*p != xdr_zero)
+		goto out_readchunk;
 
 complete:
-	ret = rqstp->rq_arg.head[0].iov_len
-		+ rqstp->rq_arg.page_len
-		+ rqstp->rq_arg.tail[0].iov_len;
 	svc_rdma_put_context(ctxt, 0);
- out:
-	dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
-		"rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
-		ret, rqstp->rq_arg.len,
-		rqstp->rq_arg.head[0].iov_base,
-		rqstp->rq_arg.head[0].iov_len);
+	dprintk("svcrdma: recvfrom: xprt=%p, rqstp=%p, rq_arg.len=%u\n",
+		rdma_xprt, rqstp, rqstp->rq_arg.len);
 	rqstp->rq_prot = IPPROTO_MAX;
 	svc_xprt_copy_addrs(rqstp, xprt);
-	return ret;
+	return rqstp->rq_arg.len;
+
+out_readchunk:
+	ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
+	if (ret < 0)
+		goto out_postfail;
+	return 0;
 
 out_err:
-	svc_rdma_send_error(rdma_xprt, &rmsgp->rm_xid, ret);
+	svc_rdma_send_error(rdma_xprt, p, ret);
 	svc_rdma_put_context(ctxt, 0);
 	return 0;
 
-defer:
-	return 0;
+out_postfail:
+	if (ret == -EINVAL)
+		svc_rdma_send_error(rdma_xprt, p, ret);
+	svc_rdma_put_context(ctxt, 1);
+	return ret;
 
 out_drop:
 	svc_rdma_put_context(ctxt, 1);

diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 0cf6202..933f79b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c

@@ -12,6 +12,9 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
+
 /* Each R/W context contains state for one chain of RDMA Read or
  * Write Work Requests.
  *
@@ -113,22 +116,20 @@ struct svc_rdma_chunk_ctxt {
 	struct svcxprt_rdma	*cc_rdma;
 	struct list_head	cc_rwctxts;
 	int			cc_sqecount;
-	enum dma_data_direction cc_dir;
 };
 
 static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
-			     struct svc_rdma_chunk_ctxt *cc,
-			     enum dma_data_direction dir)
+			     struct svc_rdma_chunk_ctxt *cc)
 {
 	cc->cc_rdma = rdma;
 	svc_xprt_get(&rdma->sc_xprt);
 
 	INIT_LIST_HEAD(&cc->cc_rwctxts);
 	cc->cc_sqecount = 0;
-	cc->cc_dir = dir;
 }
 
-static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
+static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
+				enum dma_data_direction dir)
 {
 	struct svcxprt_rdma *rdma = cc->cc_rdma;
 	struct svc_rdma_rw_ctxt *ctxt;
@@ -138,7 +139,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc)
 
 		rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
 				    rdma->sc_port_num, ctxt->rw_sg_table.sgl,
-				    ctxt->rw_nents, cc->cc_dir);
+				    ctxt->rw_nents, dir);
 		svc_rdma_put_rw_ctxt(rdma, ctxt);
 	}
 	svc_xprt_put(&rdma->sc_xprt);
@@ -176,13 +177,14 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
 	info->wi_seg_no = 0;
 	info->wi_nsegs = be32_to_cpup(++chunk);
 	info->wi_segs = ++chunk;
-	svc_rdma_cc_init(rdma, &info->wi_cc, DMA_TO_DEVICE);
+	svc_rdma_cc_init(rdma, &info->wi_cc);
+	info->wi_cc.cc_cqe.done = svc_rdma_write_done;
 	return info;
 }
 
 static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
 {
-	svc_rdma_cc_release(&info->wi_cc);
+	svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE);
 	kfree(info);
 }
 
@@ -216,6 +218,76 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	svc_rdma_write_info_free(info);
 }
 
+/* State for pulling a Read chunk.
+ */
+struct svc_rdma_read_info {
+	struct svc_rdma_op_ctxt		*ri_readctxt;
+	unsigned int			ri_position;
+	unsigned int			ri_pageno;
+	unsigned int			ri_pageoff;
+	unsigned int			ri_chunklen;
+
+	struct svc_rdma_chunk_ctxt	ri_cc;
+};
+
+static struct svc_rdma_read_info *
+svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
+{
+	struct svc_rdma_read_info *info;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return info;
+
+	svc_rdma_cc_init(rdma, &info->ri_cc);
+	info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
+	return info;
+}
+
+static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
+{
+	svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE);
+	kfree(info);
+}
+
+/**
+ * svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion
+ *
+ */
+static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_cqe *cqe = wc->wr_cqe;
+	struct svc_rdma_chunk_ctxt *cc =
+			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+	struct svcxprt_rdma *rdma = cc->cc_rdma;
+	struct svc_rdma_read_info *info =
+			container_of(cc, struct svc_rdma_read_info, ri_cc);
+
+	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
+	wake_up(&rdma->sc_send_wait);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
+		svc_rdma_put_context(info->ri_readctxt, 1);
+	} else {
+		spin_lock(&rdma->sc_rq_dto_lock);
+		list_add_tail(&info->ri_readctxt->list,
+			      &rdma->sc_read_complete_q);
+		spin_unlock(&rdma->sc_rq_dto_lock);
+
+		set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+		svc_xprt_enqueue(&rdma->sc_xprt);
+	}
+
+	svc_rdma_read_info_free(info);
+}
+
 /* This function sleeps when the transport's Send Queue is congested.
  *
  * Assumptions:
@@ -232,6 +304,9 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
 	struct ib_cqe *cqe;
 	int ret;
 
+	if (cc->cc_sqecount > rdma->sc_sq_depth)
+		return -EINVAL;
+
 	first_wr = NULL;
 	cqe = &cc->cc_cqe;
 	list_for_each(tmp, &cc->cc_rwctxts) {
@@ -295,8 +370,9 @@ static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
 	struct scatterlist *sg;
 	struct page **page;
 
-	page_off = (info->wi_next_off + xdr->page_base) & ~PAGE_MASK;
-	page_no = (info->wi_next_off + xdr->page_base) >> PAGE_SHIFT;
+	page_off = info->wi_next_off + xdr->page_base;
+	page_no = page_off >> PAGE_SHIFT;
+	page_off = offset_in_page(page_off);
 	page = xdr->pages + page_no;
 	info->wi_next_off += remaining;
 	sg = ctxt->rw_sg_table.sgl;
@@ -332,7 +408,6 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
 	__be32 *seg;
 	int ret;
 
-	cc->cc_cqe.done = svc_rdma_write_done;
 	seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
 	do {
 		unsigned int write_len;
@@ -425,6 +500,7 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
  *
  * Returns a non-negative number of bytes the chunk consumed, or
  *	%-E2BIG if the payload was larger than the Write chunk,
+ *	%-EINVAL if client provided too many segments,
  *	%-ENOMEM if rdma_rw context pool was exhausted,
  *	%-ENOTCONN if posting failed (connection is lost),
  *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
@@ -465,6 +541,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
  *
  * Returns a non-negative number of bytes the chunk consumed, or
  *	%-E2BIG if the payload was larger than the Reply chunk,
+ *	%-EINVAL if client provided too many segments,
  *	%-ENOMEM if rdma_rw context pool was exhausted,
  *	%-ENOTCONN if posting failed (connection is lost),
  *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
@@ -510,3 +587,353 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
 	svc_rdma_write_info_free(info);
 	return ret;
 }
+
+static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+				       struct svc_rqst *rqstp,
+				       u32 rkey, u32 len, u64 offset)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
+	struct svc_rdma_rw_ctxt *ctxt;
+	unsigned int sge_no, seg_len;
+	struct scatterlist *sg;
+	int ret;
+
+	sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
+	ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+	if (!ctxt)
+		goto out_noctx;
+	ctxt->rw_nents = sge_no;
+
+	dprintk("svcrdma: reading segment %u@0x%016llx:0x%08x (%u sges)\n",
+		len, offset, rkey, sge_no);
+
+	sg = ctxt->rw_sg_table.sgl;
+	for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
+		seg_len = min_t(unsigned int, len,
+				PAGE_SIZE - info->ri_pageoff);
+
+		head->arg.pages[info->ri_pageno] =
+			rqstp->rq_pages[info->ri_pageno];
+		if (!info->ri_pageoff)
+			head->count++;
+
+		sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
+			    seg_len, info->ri_pageoff);
+		sg = sg_next(sg);
+
+		info->ri_pageoff += seg_len;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+		len -= seg_len;
+
+		/* Safety check */
+		if (len &&
+		    &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+			goto out_overrun;
+	}
+
+	ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
+			       cc->cc_rdma->sc_port_num,
+			       ctxt->rw_sg_table.sgl, ctxt->rw_nents,
+			       0, offset, rkey, DMA_FROM_DEVICE);
+	if (ret < 0)
+		goto out_initerr;
+
+	list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+	cc->cc_sqecount += ret;
+	return 0;
+
+out_noctx:
+	dprintk("svcrdma: no R/W ctxs available\n");
+	return -ENOMEM;
+
+out_overrun:
+	dprintk("svcrdma: request overruns rq_pages\n");
+	return -EINVAL;
+
+out_initerr:
+	svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
+	pr_err("svcrdma: failed to map pagelist (%d)\n", ret);
+	return -EIO;
+}
+
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+				     struct svc_rdma_read_info *info,
+				     __be32 *p)
+{
+	int ret;
+
+	info->ri_chunklen = 0;
+	while (*p++ != xdr_zero) {
+		u32 rs_handle, rs_length;
+		u64 rs_offset;
+
+		if (be32_to_cpup(p++) != info->ri_position)
+			break;
+		rs_handle = be32_to_cpup(p++);
+		rs_length = be32_to_cpup(p++);
+		p = xdr_decode_hyper(p, &rs_offset);
+
+		ret = svc_rdma_build_read_segment(info, rqstp,
+						  rs_handle, rs_length,
+						  rs_offset);
+		if (ret < 0)
+			break;
+
+		info->ri_chunklen += rs_length;
+	}
+
+	return ret;
+}
+
+/* If there is inline content following the Read chunk, append it to
+ * the page list immediately following the data payload. This has to
+ * be done after the reader function has determined how many pages
+ * were consumed for RDMA Read.
+ *
+ * On entry, ri_pageno and ri_pageoff point directly to the end of the
+ * page list. On exit, both have been updated to the new "next byte".
+ *
+ * Assumptions:
+ *	- Inline content fits entirely in rq_pages[0]
+ *	- Trailing content is only a handful of bytes
+ */
+static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
+			      struct svc_rdma_read_info *info)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	unsigned int tail_length, remaining;
+	u8 *srcp, *destp;
+
+	/* Assert that all inline content fits in page 0. This is an
+	 * implementation limit, not a protocol limit.
+	 */
+	if (head->arg.head[0].iov_len > PAGE_SIZE) {
+		pr_warn_once("svcrdma: too much trailing inline content\n");
+		return -EINVAL;
+	}
+
+	srcp = head->arg.head[0].iov_base;
+	srcp += info->ri_position;
+	tail_length = head->arg.head[0].iov_len - info->ri_position;
+	remaining = tail_length;
+
+	/* If there is room on the last page in the page list, try to
+	 * fit the trailing content there.
+	 */
+	if (info->ri_pageoff > 0) {
+		unsigned int len;
+
+		len = min_t(unsigned int, remaining,
+			    PAGE_SIZE - info->ri_pageoff);
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		destp += info->ri_pageoff;
+
+		memcpy(destp, srcp, len);
+		srcp += len;
+		destp += len;
+		info->ri_pageoff += len;
+		remaining -= len;
+
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	/* Otherwise, a fresh page is needed. */
+	if (remaining) {
+		head->arg.pages[info->ri_pageno] =
+				rqstp->rq_pages[info->ri_pageno];
+		head->count++;
+
+		destp = page_address(rqstp->rq_pages[info->ri_pageno]);
+		memcpy(destp, srcp, remaining);
+		info->ri_pageoff += remaining;
+	}
+
+	head->arg.page_len += tail_length;
+	head->arg.len += tail_length;
+	head->arg.buflen += tail_length;
+	return 0;
+}
+
+/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
+ * data lands in the page list of head->arg.pages.
+ *
+ * Currently NFSD does not look at the head->arg.tail[0] iovec.
+ * Therefore, XDR round-up of the Read chunk and trailing
+ * inline content must both be added at the end of the pagelist.
+ */
+static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
+					    struct svc_rdma_read_info *info,
+					    __be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Read chunk at position %u\n",
+		info->ri_position);
+
+	info->ri_pageno = head->hdr_count;
+	info->ri_pageoff = 0;
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	/* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+	 */
+	if (info->ri_chunklen & 3) {
+		u32 padlen = 4 - (info->ri_chunklen & 3);
+
+		info->ri_chunklen += padlen;
+
+		/* NB: data payload always starts on XDR alignment,
+		 * thus the pad can never contain a page boundary.
+		 */
+		info->ri_pageoff += padlen;
+		if (info->ri_pageoff == PAGE_SIZE) {
+			info->ri_pageno++;
+			info->ri_pageoff = 0;
+		}
+	}
+
+	head->arg.page_len = info->ri_chunklen;
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (info->ri_position < head->arg.head[0].iov_len) {
+		ret = svc_rdma_copy_tail(rqstp, info);
+		if (ret < 0)
+			goto out;
+	}
+	head->arg.head[0].iov_len = info->ri_position;
+
+out:
+	return ret;
+}
+
+/* Construct RDMA Reads to pull over a Position Zero Read chunk.
+ * The start of the data lands in the first page just after
+ * the Transport header, and the rest lands in the page list of
+ * head->arg.pages.
+ *
+ * Assumptions:
+ *	- A PZRC has an XDR-aligned length (no implicit round-up).
+ *	- There can be no trailing inline content (IOW, we assume
+ *	  a PZRC is never sent in an RDMA_MSG message, though it's
+ *	  allowed by spec).
+ */
+static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
+					struct svc_rdma_read_info *info,
+					__be32 *p)
+{
+	struct svc_rdma_op_ctxt *head = info->ri_readctxt;
+	int ret;
+
+	dprintk("svcrdma: Reading Position Zero Read chunk\n");
+
+	info->ri_pageno = head->hdr_count - 1;
+	info->ri_pageoff = offset_in_page(head->byte_len);
+
+	ret = svc_rdma_build_read_chunk(rqstp, info, p);
+	if (ret < 0)
+		goto out;
+
+	head->arg.len += info->ri_chunklen;
+	head->arg.buflen += info->ri_chunklen;
+
+	if (head->arg.buflen <= head->sge[0].length) {
+		/* Transport header and RPC message fit entirely
+		 * in page where head iovec resides.
+		 */
+		head->arg.head[0].iov_len = info->ri_chunklen;
+	} else {
+		/* Transport header and part of RPC message reside
+		 * in the head iovec's page.
+		 */
+		head->arg.head[0].iov_len =
+				head->sge[0].length - head->byte_len;
+		head->arg.page_len =
+				info->ri_chunklen - head->arg.head[0].iov_len;
+	}
+
+out:
+	return ret;
+}
+
+/**
+ * svc_rdma_recv_read_chunk - Pull a Read chunk from the client
+ * @rdma: controlling RDMA transport
+ * @rqstp: set of pages to use as Read sink buffers
+ * @head: pages under I/O collect here
+ * @p: pointer to start of Read chunk
+ *
+ * Returns:
+ *	%0 if all needed RDMA Reads were posted successfully,
+ *	%-EINVAL if client provided too many segments,
+ *	%-ENOMEM if rdma_rw context pool was exhausted,
+ *	%-ENOTCONN if posting failed (connection is lost),
+ *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ *
+ * Assumptions:
+ * - All Read segments in @p have the same Position value.
+ */
+int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
+			     struct svc_rdma_op_ctxt *head, __be32 *p)
+{
+	struct svc_rdma_read_info *info;
+	struct page **page;
+	int ret;
+
+	/* The request (with page list) is constructed in
+	 * head->arg. Pages involved with RDMA Read I/O are
+	 * transferred there.
+	 */
+	head->hdr_count = head->count;
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = head->pages;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
+
+	info = svc_rdma_read_info_alloc(rdma);
+	if (!info)
+		return -ENOMEM;
+	info->ri_readctxt = head;
+
+	info->ri_position = be32_to_cpup(p + 1);
+	if (info->ri_position)
+		ret = svc_rdma_build_normal_read_chunk(rqstp, info, p);
+	else
+		ret = svc_rdma_build_pz_read_chunk(rqstp, info, p);
+
+	/* Mark the start of the pages that can be used for the reply */
+	if (info->ri_pageoff > 0)
+		info->ri_pageno++;
+	rqstp->rq_respages = &rqstp->rq_pages[info->ri_pageno];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+	if (ret < 0)
+		goto out;
+
+	ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
+
+out:
+	/* Read sink pages have been moved from rqstp->rq_pages to
+	 * head->arg.pages. Force svc_recv to refill those slots
+	 * in rq_pages.
+	 */
+	for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
+		*page = NULL;
+
+	if (ret < 0)
+		svc_rdma_read_info_free(info);
+	return ret;
+}

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1736337..7c3a211 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c

@@ -313,13 +313,17 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
 	dma_addr = ib_dma_map_page(dev, virt_to_page(base),
 				   offset, len, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(dev, dma_addr))
-		return -EIO;
+		goto out_maperr;
 
 	ctxt->sge[sge_no].addr = dma_addr;
 	ctxt->sge[sge_no].length = len;
 	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
 	svc_rdma_count_mappings(rdma, ctxt);
 	return 0;
+
+out_maperr:
+	pr_err("svcrdma: failed to map buffer\n");
+	return -EIO;
 }
 
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
@@ -334,13 +338,17 @@ static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
 
 	dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(dev, dma_addr))
-		return -EIO;
+		goto out_maperr;
 
 	ctxt->sge[sge_no].addr = dma_addr;
 	ctxt->sge[sge_no].length = len;
 	ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
 	svc_rdma_count_mappings(rdma, ctxt);
 	return 0;
+
+out_maperr:
+	pr_err("svcrdma: failed to map page\n");
+	return -EIO;
 }
 
 /**
@@ -547,7 +555,6 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
 	return 0;
 
 err:
-	pr_err("svcrdma: failed to post Send WR (%d)\n", ret);
 	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_context(ctxt, 1);
 	return ret;
@@ -677,7 +684,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	return 0;
 
  err2:
-	if (ret != -E2BIG)
+	if (ret != -E2BIG && ret != -EINVAL)
 		goto err1;
 
 	ret = svc_rdma_post_recv(rdma, GFP_KERNEL);

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index a9d9cb1..e660d49 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c

@@ -202,7 +202,6 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 out:
 	ctxt->count = 0;
 	ctxt->mapped_sges = 0;
-	ctxt->frmr = NULL;
 	return ctxt;
 
 out_empty:
@@ -226,22 +225,13 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 {
 	struct svcxprt_rdma *xprt = ctxt->xprt;
 	struct ib_device *device = xprt->sc_cm_id->device;
-	u32 lkey = xprt->sc_pd->local_dma_lkey;
 	unsigned int i;
 
-	for (i = 0; i < ctxt->mapped_sges; i++) {
-		/*
-		 * Unmap the DMA addr in the SGE if the lkey matches
-		 * the local_dma_lkey, otherwise, ignore it since it is
-		 * an FRMR lkey and will be unmapped later when the
-		 * last WR that uses it completes.
-		 */
-		if (ctxt->sge[i].lkey == lkey)
-			ib_dma_unmap_page(device,
-					    ctxt->sge[i].addr,
-					    ctxt->sge[i].length,
-					    ctxt->direction);
-	}
+	for (i = 0; i < ctxt->mapped_sges; i++)
+		ib_dma_unmap_page(device,
+				  ctxt->sge[i].addr,
+				  ctxt->sge[i].length,
+				  ctxt->direction);
 	ctxt->mapped_sges = 0;
 }
 
@@ -346,36 +336,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
-				    struct ib_wc *wc,
-				    const char *opname)
-{
-	if (wc->status != IB_WC_SUCCESS)
-		goto err;
-
-out:
-	atomic_inc(&xprt->sc_sq_avail);
-	wake_up(&xprt->sc_send_wait);
-	return;
-
-err:
-	set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-	if (wc->status != IB_WC_WR_FLUSH_ERR)
-		pr_err("svcrdma: %s: %s (%u/0x%x)\n",
-		       opname, ib_wc_status_msg(wc->status),
-		       wc->status, wc->vendor_err);
-	goto out;
-}
-
-static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
-					const char *opname)
-{
-	struct svcxprt_rdma *xprt = cq->cq_context;
-
-	svc_rdma_send_wc_common(xprt, wc, opname);
-	svc_xprt_put(&xprt->sc_xprt);
-}
-
 /**
  * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  * @cq:        completion queue
@@ -384,73 +344,28 @@ static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
  */
 void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct ib_cqe *cqe = wc->wr_cqe;
-	struct svc_rdma_op_ctxt *ctxt;
-
-	svc_rdma_send_wc_common_put(cq, wc, "send");
-
-	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
-	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_context(ctxt, 1);
-}
-
-/**
- * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "fastreg");
-}
-
-/**
- * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
-{
 	struct svcxprt_rdma *xprt = cq->cq_context;
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_op_ctxt *ctxt;
 
-	svc_rdma_send_wc_common(xprt, wc, "read");
+	atomic_inc(&xprt->sc_sq_avail);
+	wake_up(&xprt->sc_send_wait);
 
 	ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, ctxt->frmr);
+	svc_rdma_put_context(ctxt, 1);
 
-	if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
-		struct svc_rdma_op_ctxt *read_hdr;
-
-		read_hdr = ctxt->read_hdr;
-		spin_lock(&xprt->sc_rq_dto_lock);
-		list_add_tail(&read_hdr->list,
-			      &xprt->sc_read_complete_q);
-		spin_unlock(&xprt->sc_rq_dto_lock);
-
-		set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
-		svc_xprt_enqueue(&xprt->sc_xprt);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			pr_err("svcrdma: Send: %s (%u/0x%x)\n",
+			       ib_wc_status_msg(wc->status),
+			       wc->status, wc->vendor_err);
 	}
 
-	svc_rdma_put_context(ctxt, 0);
 	svc_xprt_put(&xprt->sc_xprt);
 }
 
-/**
- * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
- * @cq:        completion queue
- * @wc:        completed WR
- *
- */
-void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
-{
-	svc_rdma_send_wc_common_put(cq, wc, "localInv");
-}
-
 static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 					     int listener)
 {
@@ -462,14 +377,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
-	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
-	spin_lock_init(&cma_xprt->sc_frmr_q_lock);
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
@@ -780,86 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	return ERR_PTR(ret);
 }
 
-static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
-{
-	struct ib_mr *mr;
-	struct scatterlist *sg;
-	struct svc_rdma_fastreg_mr *frmr;
-	u32 num_sg;
-
-	frmr = kmalloc(sizeof(*frmr), GFP_KERNEL);
-	if (!frmr)
-		goto err;
-
-	num_sg = min_t(u32, RPCSVC_MAXPAGES, xprt->sc_frmr_pg_list_len);
-	mr = ib_alloc_mr(xprt->sc_pd, IB_MR_TYPE_MEM_REG, num_sg);
-	if (IS_ERR(mr))
-		goto err_free_frmr;
-
-	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
-	if (!sg)
-		goto err_free_mr;
-
-	sg_init_table(sg, RPCSVC_MAXPAGES);
-
-	frmr->mr = mr;
-	frmr->sg = sg;
-	INIT_LIST_HEAD(&frmr->frmr_list);
-	return frmr;
-
- err_free_mr:
-	ib_dereg_mr(mr);
- err_free_frmr:
-	kfree(frmr);
- err:
-	return ERR_PTR(-ENOMEM);
-}
-
-static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
-{
-	struct svc_rdma_fastreg_mr *frmr;
-
-	while (!list_empty(&xprt->sc_frmr_q)) {
-		frmr = list_entry(xprt->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		kfree(frmr->sg);
-		ib_dereg_mr(frmr->mr);
-		kfree(frmr);
-	}
-}
-
-struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
-{
-	struct svc_rdma_fastreg_mr *frmr = NULL;
-
-	spin_lock(&rdma->sc_frmr_q_lock);
-	if (!list_empty(&rdma->sc_frmr_q)) {
-		frmr = list_entry(rdma->sc_frmr_q.next,
-				  struct svc_rdma_fastreg_mr, frmr_list);
-		list_del_init(&frmr->frmr_list);
-		frmr->sg_nents = 0;
-	}
-	spin_unlock(&rdma->sc_frmr_q_lock);
-	if (frmr)
-		return frmr;
-
-	return rdma_alloc_frmr(rdma);
-}
-
-void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
-		       struct svc_rdma_fastreg_mr *frmr)
-{
-	if (frmr) {
-		ib_dma_unmap_sg(rdma->sc_cm_id->device,
-				frmr->sg, frmr->sg_nents, frmr->direction);
-		spin_lock(&rdma->sc_frmr_q_lock);
-		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
-		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-		spin_unlock(&rdma->sc_frmr_q_lock);
-	}
-}
-
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -908,8 +741,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	 * capabilities of this particular device */
 	newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
 				  (size_t)RPCSVC_MAXPAGES);
-	newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
-				       RPCSVC_MAXPAGES);
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
 	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
 					 svcrdma_max_requests);
@@ -952,7 +783,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	memset(&qp_attr, 0, sizeof qp_attr);
 	qp_attr.event_handler = qp_event_handler;
 	qp_attr.qp_context = &newxprt->sc_xprt;
-	qp_attr.port_num = newxprt->sc_cm_id->port_num;
+	qp_attr.port_num = newxprt->sc_port_num;
 	qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
 	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
 	qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
@@ -976,47 +807,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	}
 	newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-	/*
-	 * Use the most secure set of MR resources based on the
-	 * transport type and available memory management features in
-	 * the device. Here's the table implemented below:
-	 *
-	 *		Fast	Global	DMA	Remote WR
-	 *		Reg	LKEY	MR	Access
-	 *		Sup'd	Sup'd	Needed	Needed
-	 *
-	 * IWARP	N	N	Y	Y
-	 *		N	Y	Y	Y
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * IB		N	N	Y	N
-	 *		N	Y	N	-
-	 *		Y	N	Y	N
-	 *		Y	Y	N	-
-	 *
-	 * NB:	iWARP requires remote write access for the data sink
-	 *	of an RDMA_READ. IB does not.
-	 */
-	newxprt->sc_reader = rdma_read_chunk_lcl;
-	if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-		newxprt->sc_frmr_pg_list_len =
-			dev->attrs.max_fast_reg_page_list_len;
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-		newxprt->sc_reader = rdma_read_chunk_frmr;
-	} else
+	if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
 		newxprt->sc_snd_w_inv = false;
-
-	/*
-	 * Determine if a DMA MR is required and if so, what privs are required
-	 */
-	if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
-	    !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
+	if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
+	    !rdma_ib_or_roce(dev, newxprt->sc_port_num))
 		goto errout;
 
-	if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
-		newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
-
 	/* Post receive buffers */
 	for (i = 0; i < newxprt->sc_max_requests; i++) {
 		ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
@@ -1056,7 +852,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
 	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
 	dprintk("    max_sge         : %d\n", newxprt->sc_max_sge);
-	dprintk("    max_sge_rd      : %d\n", newxprt->sc_max_sge_rd);
 	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
 	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
 	dprintk("    ord             : %d\n", newxprt->sc_ord);
@@ -1117,12 +912,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		pr_err("svcrdma: sc_xprt still in use? (%d)\n",
 		       kref_read(&xprt->xpt_ref));
 
-	/*
-	 * Destroy queued, but not processed read completions. Note
-	 * that this cleanup has to be done before destroying the
-	 * cm_id because the device ptr is needed to unmap the dma in
-	 * svc_rdma_put_context.
-	 */
 	while (!list_empty(&rdma->sc_read_complete_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_read_complete_q,
@@ -1130,8 +919,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		list_del(&ctxt->list);
 		svc_rdma_put_context(ctxt, 1);
 	}
-
-	/* Destroy queued, but not processed recv completions */
 	while (!list_empty(&rdma->sc_rq_dto_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
 		ctxt = list_first_entry(&rdma->sc_rq_dto_q,
@@ -1151,7 +938,6 @@ static void __svc_rdma_free(struct work_struct *work)
 		xprt->xpt_bc_xprt = NULL;
 	}
 
-	rdma_dealloc_frmr_q(rdma);
 	svc_rdma_destroy_rw_ctxts(rdma);
 	svc_rdma_destroy_ctxts(rdma);
 

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 62ecbcc..d1c458e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c

@@ -684,7 +684,8 @@ xprt_rdma_free(struct rpc_task *task)
 
 	dprintk("RPC:       %s: called on 0x%p\n", __func__, req->rl_reply);
 
-	if (unlikely(!list_empty(&req->rl_registered)))
+	rpcrdma_remove_req(&r_xprt->rx_buf, req);
+	if (!list_empty(&req->rl_registered))
 		ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
 	rpcrdma_unmap_sges(ia, req);
 	rpcrdma_buffer_put(req);

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3dbce9a..e4171f2 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c

@@ -243,8 +243,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
 #endif
-	struct ib_qp_attr *attr = &ia->ri_qp_attr;
-	struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
 	int connstate = 0;
 
 	switch (event->event) {
@@ -267,7 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-		pr_info("rpcrdma: removing device for %pIS:%u\n",
+		pr_info("rpcrdma: removing device %s for %pIS:%u\n",
+			ia->ri_device->name,
 			sap, rpc_get_port(sap));
 #endif
 		set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -282,13 +281,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		connstate = 1;
-		ib_query_qp(ia->ri_id->qp, attr,
-			    IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
-			    iattr);
-		dprintk("RPC:       %s: %d responder resources"
-			" (%d initiator)\n",
-			__func__, attr->max_dest_rd_atomic,
-			attr->max_rd_atomic);
 		rpcrdma_update_connect_private(xprt, &event->param.conn);
 		goto connected;
 	case RDMA_CM_EVENT_CONNECT_ERROR:
@@ -298,11 +290,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		connstate = -ENETDOWN;
 		goto connected;
 	case RDMA_CM_EVENT_REJECTED:
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-		pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
-			sap, rpc_get_port(sap), ia->ri_device->name,
+		dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
+			sap, rpc_get_port(sap),
 			rdma_reject_msg(id, event->status));
-#endif
 		connstate = -ECONNREFUSED;
 		if (event->status == IB_CM_REJ_STALE_CONN)
 			connstate = -EAGAIN;
@@ -310,37 +300,19 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
 connected:
-		dprintk("RPC:       %s: %sconnected\n",
-					__func__, connstate > 0 ? "" : "dis");
 		atomic_set(&xprt->rx_buf.rb_credits, 1);
 		ep->rep_connected = connstate;
 		rpcrdma_conn_func(ep);
 		wake_up_all(&ep->rep_connect_wait);
 		/*FALLTHROUGH*/
 	default:
-		dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
-			__func__, sap, rpc_get_port(sap), ep,
-			rdma_event_msg(event->event));
+		dprintk("RPC:       %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
+			__func__, sap, rpc_get_port(sap),
+			ia->ri_device->name, ia->ri_ops->ro_displayname,
+			ep, rdma_event_msg(event->event));
 		break;
 	}
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	if (connstate == 1) {
-		int ird = attr->max_dest_rd_atomic;
-		int tird = ep->rep_remote_cma.responder_resources;
-
-		pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
-			sap, rpc_get_port(sap),
-			ia->ri_device->name,
-			ia->ri_ops->ro_displayname,
-			xprt->rx_buf.rb_max_requests,
-			ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
-	} else if (connstate < 0) {
-		pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
-			sap, rpc_get_port(sap), connstate);
-	}
-#endif
-
 	return 0;
 }
 
@@ -971,7 +943,6 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
 	if (req == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&req->rl_free);
 	spin_lock(&buffer->rb_reqslock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
 	spin_unlock(&buffer->rb_reqslock);
@@ -1033,6 +1004,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 	spin_lock_init(&buf->rb_recovery_lock);
 	INIT_LIST_HEAD(&buf->rb_mws);
 	INIT_LIST_HEAD(&buf->rb_all);
+	INIT_LIST_HEAD(&buf->rb_pending);
 	INIT_LIST_HEAD(&buf->rb_stale_mrs);
 	INIT_DELAYED_WORK(&buf->rb_refresh_worker,
 			  rpcrdma_mr_refresh_worker);
@@ -1055,7 +1027,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 			goto out;
 		}
 		req->rl_backchannel = false;
-		list_add(&req->rl_free, &buf->rb_send_bufs);
+		list_add(&req->rl_list, &buf->rb_send_bufs);
 	}
 
 	INIT_LIST_HEAD(&buf->rb_recv_bufs);
@@ -1084,8 +1056,8 @@ rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
 	struct rpcrdma_req *req;
 
 	req = list_first_entry(&buf->rb_send_bufs,
-			       struct rpcrdma_req, rl_free);
-	list_del(&req->rl_free);
+			       struct rpcrdma_req, rl_list);
+	list_del_init(&req->rl_list);
 	return req;
 }
 
@@ -1187,6 +1159,7 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 
 	if (!mw)
 		goto out_nomws;
+	mw->mw_flags = 0;
 	return mw;
 
 out_nomws:
@@ -1267,7 +1240,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
 
 	spin_lock(&buffers->rb_lock);
 	buffers->rb_send_count--;
-	list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
+	list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
 	if (rep) {
 		buffers->rb_recv_count--;
 		list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 1d66acf..b282d3f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h

@@ -271,6 +271,7 @@ struct rpcrdma_mw {
 	struct scatterlist	*mw_sg;
 	int			mw_nents;
 	enum dma_data_direction	mw_dir;
+	unsigned long		mw_flags;
 	union {
 		struct rpcrdma_fmr	fmr;
 		struct rpcrdma_frmr	frmr;
@@ -282,6 +283,11 @@ struct rpcrdma_mw {
 	struct list_head	mw_all;
 };
 
+/* mw_flags */
+enum {
+	RPCRDMA_MW_F_RI		= 1,
+};
+
 /*
  * struct rpcrdma_req -- structure central to the request/reply sequence.
  *
@@ -334,7 +340,8 @@ enum {
 
 struct rpcrdma_buffer;
 struct rpcrdma_req {
-	struct list_head	rl_free;
+	struct list_head	rl_list;
+	__be32			rl_xid;
 	unsigned int		rl_mapped_sges;
 	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
@@ -396,6 +403,7 @@ struct rpcrdma_buffer {
 	int			rb_send_count, rb_recv_count;
 	struct list_head	rb_send_bufs;
 	struct list_head	rb_recv_bufs;
+	struct list_head	rb_pending;
 	u32			rb_max_requests;
 	atomic_t		rb_credits;	/* most recent credit grant */
 
@@ -461,7 +469,7 @@ struct rpcrdma_memreg_ops {
 				  struct rpcrdma_mr_seg *, int, bool,
 				  struct rpcrdma_mw **);
 	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
-					 struct rpcrdma_req *);
+					 struct list_head *);
 	void		(*ro_unmap_safe)(struct rpcrdma_xprt *,
 					 struct rpcrdma_req *, bool);
 	void		(*ro_recover_mr)(struct rpcrdma_mw *);
@@ -544,6 +552,34 @@ void rpcrdma_destroy_req(struct rpcrdma_req *);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
 
+static inline void
+rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+	spin_lock(&buffers->rb_lock);
+	if (list_empty(&req->rl_list))
+		list_add_tail(&req->rl_list, &buffers->rb_pending);
+	spin_unlock(&buffers->rb_lock);
+}
+
+static inline struct rpcrdma_req *
+rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
+{
+	struct rpcrdma_req *pos;
+
+	list_for_each_entry(pos, &buffers->rb_pending, rl_list)
+		if (pos->rl_xid == xid)
+			return pos;
+	return NULL;
+}
+
+static inline void
+rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+	spin_lock(&buffers->rb_lock);
+	list_del(&req->rl_list);
+	spin_unlock(&buffers->rb_lock);
+}
+
 struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
 void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
 struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 9c65058..87246be 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile

@@ -207,6 +207,7 @@
 # useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
 	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
+		-I$(srctree)/tools/testing/selftests/bpf/ \
 		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-Wno-compare-distinct-pointer-types \
 		-Wno-gnu-variable-sized-type-not-at-end \

diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index aa243db..be0d4a5 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c

@@ -75,8 +75,8 @@ static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"page %p offset 0x%.8x length 0x%.8x\n",
+			i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;
@@ -104,8 +104,8 @@ static int __init example_init(void)
 	for (i = 0; i < nents; i++) {
 		printk(KERN_INFO
 		"sg[%d] -> "
-		"page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
-			i, sg[i].page_link, sg[i].offset, sg[i].length);
+		"page %p offset 0x%.8x length 0x%.8x\n",
+			i, sg_page(&sg[i]), sg[i].offset, sg[i].length);
 
 		if (sg_is_last(&sg[i]))
 			break;

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index c583a1e..343d586 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst

@@ -23,15 +23,12 @@
 		 $(filter-out $(srcdir)/,\
 		 $(sort $(dir $(wildcard $(srcdir)/*/)))))
 
-# caller may set destination dir (when installing to asm/)
-_dst          := $(if $(dst),$(dst),$(obj))
-
 # Recursion
 __headers: $(subdirs)
 
 .PHONY: $(subdirs)
 $(subdirs):
-	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@
+	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(dst)/$@
 
 # Skip header install/check for include/uapi and arch/$(hdr-arch)/include/uapi.
 # We have only sub-directories there.
@@ -39,21 +36,12 @@
 
 ifeq ($(skip-inst),)
 
-# generated header directory
-gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
-
 # Kbuild file is optional
 kbuild-file := $(srctree)/$(obj)/Kbuild
 -include $(kbuild-file)
 
-old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
-ifneq ($(wildcard $(old-kbuild-file)),)
-include $(old-kbuild-file)
-endif
-
-installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
-
-gendir        := $(objtree)/$(gen)
+installdir    := $(INSTALL_HDR_PATH)/$(dst)
+gendir        := $(objtree)/$(subst include/,include/generated/,$(obj))
 header-files  := $(notdir $(wildcard $(srcdir)/*.h))
 header-files  += $(notdir $(wildcard $(srcdir)/*.agh))
 header-files  := $(filter-out $(no-export-headers), $(header-files))
@@ -64,14 +52,8 @@
 install-file  := $(installdir)/.install
 check-file    := $(installdir)/.check
 
-# generic-y list all files an architecture uses from asm-generic
-# Use this to build a list of headers which require a wrapper
-generic-files := $(notdir $(wildcard $(srctree)/include/uapi/asm-generic/*.h))
-wrapper-files := $(filter $(generic-files), $(generic-y))
-wrapper-files := $(filter-out $(header-files), $(wrapper-files))
-
 # all headers files for this dir
-all-files     := $(header-files) $(genhdr-files) $(wrapper-files)
+all-files     := $(header-files) $(genhdr-files)
 output-files  := $(addprefix $(installdir)/, $(all-files))
 
 ifneq ($(mandatory-y),)
@@ -95,9 +77,6 @@
       cmd_install = \
         $(CONFIG_SHELL) $< $(installdir) $(srcdir) $(header-files); \
         $(CONFIG_SHELL) $< $(installdir) $(gendir) $(genhdr-files); \
-        for F in $(wrapper-files); do                                   \
-                echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
-        done;                                                           \
         touch $@
 
 quiet_cmd_remove = REMOVE  $(unwanted)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3a225d0..2287a0b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl

@@ -57,7 +57,7 @@
 my $codespellfile = "/usr/share/codespell/dictionary.txt";
 my $conststructsfile = "$D/const_structs.checkpatch";
 my $typedefsfile = "";
-my $color = 1;
+my $color = "auto";
 my $allow_c99_comments = 1;
 
 sub help {
@@ -116,7 +116,8 @@
                              (default:/usr/share/codespell/dictionary.txt)
   --codespellfile            Use this codespell dictionary
   --typedefsfile             Read additional types from this file
-  --color                    Use colors when output is STDOUT (default: on)
+  --color[=WHEN]             Use colors 'always', 'never', or only when output
+                             is a terminal ('auto'). Default is 'auto'.
   -h, --help, --version      display this help and exit
 
 When FILE is - read standard input.
@@ -182,6 +183,14 @@
 	unshift(@ARGV, @conf_args) if @conf_args;
 }
 
+# Perl's Getopt::Long allows options to take optional arguments after a space.
+# Prevent --color by itself from consuming other arguments
+foreach (@ARGV) {
+	if ($_ eq "--color" || $_ eq "-color") {
+		$_ = "--color=$color";
+	}
+}
+
 GetOptions(
 	'q|quiet+'	=> \$quiet,
 	'tree!'		=> \$tree,
@@ -212,7 +221,9 @@
 	'codespell!'	=> \$codespell,
 	'codespellfile=s'	=> \$codespellfile,
 	'typedefsfile=s'	=> \$typedefsfile,
-	'color!'	=> \$color,
+	'color=s'	=> \$color,
+	'no-color'	=> \$color,	#keep old behaviors of -nocolor
+	'nocolor'	=> \$color,	#keep old behaviors of -nocolor
 	'h|help'	=> \$help,
 	'version'	=> \$help
 ) or help(1);
@@ -238,6 +249,18 @@
 	push(@ARGV, '-');
 }
 
+if ($color =~ /^[01]$/) {
+	$color = !$color;
+} elsif ($color =~ /^always$/i) {
+	$color = 1;
+} elsif ($color =~ /^never$/i) {
+	$color = 0;
+} elsif ($color =~ /^auto$/i) {
+	$color = (-t STDOUT);
+} else {
+	die "Invalid color mode: $color\n";
+}
+
 sub hash_save_array_words {
 	my ($hashRef, $arrayRef) = @_;
 
@@ -733,7 +756,7 @@
 
 our $declaration_macros = qr{(?x:
 	(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
-	(?:$Storage\s+)?LIST_HEAD\s*\(|
+	(?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
 	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
 )};
 
@@ -867,6 +890,7 @@
 #		    echo "commit $(cut -c 1-12,41-)"
 #		done
 	} elsif ($lines[0] =~ /^fatal: ambiguous argument '$commit': unknown revision or path not in the working tree\./) {
+		$id = undef;
 	} else {
 		$id = substr($lines[0], 0, 12);
 		$desc = substr($lines[0], 41);
@@ -1882,7 +1906,7 @@
 		return 0;
 	}
 	my $output = '';
-	if (-t STDOUT && $color) {
+	if ($color) {
 		if ($level eq 'ERROR') {
 			$output .= RED;
 		} elsif ($level eq 'WARNING') {
@@ -1893,10 +1917,10 @@
 	}
 	$output .= $prefix . $level . ':';
 	if ($show_types) {
-		$output .= BLUE if (-t STDOUT && $color);
+		$output .= BLUE if ($color);
 		$output .= "$type:";
 	}
-	$output .= RESET if (-t STDOUT && $color);
+	$output .= RESET if ($color);
 	$output .= ' ' . $msg . "\n";
 
 	if ($showfile) {
@@ -2606,7 +2630,8 @@
 			($id, $description) = git_commit_info($orig_commit,
 							      $id, $orig_desc);
 
-			if ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens) {
+			if (defined($id) &&
+			   ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens)) {
 				ERROR("GIT_COMMIT_ID",
 				      "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr);
 			}
@@ -2776,6 +2801,17 @@
 			#print "is_start<$is_start> is_end<$is_end> length<$length>\n";
 		}
 
+# check for MAINTAINERS entries that don't have the right form
+		if ($realfile =~ /^MAINTAINERS$/ &&
+		    $rawline =~ /^\+[A-Z]:/ &&
+		    $rawline !~ /^\+[A-Z]:\t\S/) {
+			if (WARN("MAINTAINERS_STYLE",
+				 "MAINTAINERS entries use one tab after TYPE:\n" . $herecurr) &&
+			    $fix) {
+				$fixed[$fixlinenr] =~ s/^(\+[A-Z]):\s*/$1:\t/;
+			}
+		}
+
 # discourage the use of boolean for type definition attributes of Kconfig options
 		if ($realfile =~ /Kconfig/ &&
 		    $line =~ /^\+\s*\bboolean\b/) {
@@ -2957,7 +2993,7 @@
 
 # check multi-line statement indentation matches previous line
 		if ($^V && $^V ge 5.10.0 &&
-		    $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|$Ident\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
+		    $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
 			$prevline =~ /^\+(\t*)(.*)$/;
 			my $oldindent = $1;
 			my $rest = $2;
@@ -3208,7 +3244,7 @@
 		my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
 		    $realline_next);
 #print "LINE<$line>\n";
-		if ($linenr >= $suppress_statement &&
+		if ($linenr > $suppress_statement &&
 		    $realcnt && $sline =~ /.\s*\S/) {
 			($stat, $cond, $line_nr_next, $remain_next, $off_next) =
 				ctx_statement_block($linenr, $realcnt, 0);
@@ -3542,7 +3578,7 @@
 				$fixedline =~ s/\s*=\s*$/ = {/;
 				fix_insert_line($fixlinenr, $fixedline);
 				$fixedline = $line;
-				$fixedline =~ s/^(.\s*){\s*/$1/;
+				$fixedline =~ s/^(.\s*)\{\s*/$1/;
 				fix_insert_line($fixlinenr, $fixedline);
 			}
 		}
@@ -3883,7 +3919,7 @@
 				my $fixedline = rtrim($prevrawline) . " {";
 				fix_insert_line($fixlinenr, $fixedline);
 				$fixedline = $rawline;
-				$fixedline =~ s/^(.\s*){\s*/$1\t/;
+				$fixedline =~ s/^(.\s*)\{\s*/$1\t/;
 				if ($fixedline !~ /^\+\s*$/) {
 					fix_insert_line($fixlinenr, $fixedline);
 				}
@@ -4372,7 +4408,7 @@
 			if (ERROR("SPACING",
 				  "space required before the open brace '{'\n" . $herecurr) &&
 			    $fix) {
-				$fixed[$fixlinenr] =~ s/^(\+.*(?:do|\))){/$1 {/;
+				$fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
 			}
 		}
 
@@ -4904,17 +4940,17 @@
 			foreach my $arg (@def_args) {
 			        next if ($arg =~ /\.\.\./);
 			        next if ($arg =~ /^type$/i);
-				my $tmp = $define_stmt;
-				$tmp =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
-				$tmp =~ s/\#+\s*$arg\b//g;
-				$tmp =~ s/\b$arg\s*\#\#//g;
-				my $use_cnt = $tmp =~ s/\b$arg\b//g;
+				my $tmp_stmt = $define_stmt;
+				$tmp_stmt =~ s/\b(typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
+				$tmp_stmt =~ s/\#+\s*$arg\b//g;
+				$tmp_stmt =~ s/\b$arg\s*\#\#//g;
+				my $use_cnt = $tmp_stmt =~ s/\b$arg\b//g;
 				if ($use_cnt > 1) {
 					CHK("MACRO_ARG_REUSE",
 					    "Macro argument reuse '$arg' - possible side-effects?\n" . "$herectx");
 				    }
 # check if any macro arguments may have other precedence issues
-				if ($define_stmt =~ m/($Operators)?\s*\b$arg\b\s*($Operators)?/m &&
+				if ($tmp_stmt =~ m/($Operators)?\s*\b$arg\b\s*($Operators)?/m &&
 				    ((defined($1) && $1 ne ',') ||
 				     (defined($2) && $2 ne ','))) {
 					CHK("MACRO_ARG_PRECEDENCE",
@@ -5311,7 +5347,7 @@
 			my ($s, $c) = ctx_statement_block($linenr - 3, $realcnt, 0);
 #			print("line: <$line>\nprevline: <$prevline>\ns: <$s>\nc: <$c>\n\n\n");
 
-			if ($c =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|(?:dev_)?alloc_skb)/) {
+			if ($s =~ /(?:^|\n)[ \+]\s*(?:$Type\s*)?\Q$testval\E\s*=\s*(?:\([^\)]*\)\s*)?\s*(?:devm_)?(?:[kv][czm]alloc(?:_node|_array)?\b|kstrdup|kmemdup|(?:dev_)?alloc_skb)/) {
 				WARN("OOM_MESSAGE",
 				     "Possible unnecessary 'out of memory' message\n" . $hereprev);
 			}
@@ -5540,10 +5576,18 @@
 			    "architecture specific defines should be avoided\n" .  $herecurr);
 		}
 
-# Check that the storage class is at the beginning of a declaration
-		if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+# check that the storage class is not after a type
+		if ($line =~ /\b($Type)\s+($Storage)\b/) {
 			WARN("STORAGE_CLASS",
-			     "storage class should be at the beginning of the declaration\n" . $herecurr)
+			     "storage class '$2' should be located before type '$1'\n" . $herecurr);
+		}
+# Check that the storage class is at the beginning of a declaration
+		if ($line =~ /\b$Storage\b/ &&
+		    $line !~ /^.\s*$Storage/ &&
+		    $line =~ /^.\s*(.+?)\$Storage\s/ &&
+		    $1 !~ /[\,\)]\s*$/) {
+			WARN("STORAGE_CLASS",
+			     "storage class should be at the beginning of the declaration\n" . $herecurr);
 		}
 
 # check the location of the inline attribute, that it is between
@@ -5886,7 +5930,8 @@
 			     "externs should be avoided in .c files\n" .  $herecurr);
 		}
 
-		if ($realfile =~ /\.[ch]$/ && defined $stat &&
+# check for function declarations that have arguments without identifier names
+		if (defined $stat &&
 		    $stat =~ /^.\s*(?:extern\s+)?$Type\s*$Ident\s*\(\s*([^{]+)\s*\)\s*;/s &&
 		    $1 ne "void") {
 			my $args = trim($1);
@@ -5899,6 +5944,29 @@
 			}
 		}
 
+# check for function definitions
+		if ($^V && $^V ge 5.10.0 &&
+		    defined $stat &&
+		    $stat =~ /^.\s*(?:$Storage\s+)?$Type\s*($Ident)\s*$balanced_parens\s*{/s) {
+			$context_function = $1;
+
+# check for multiline function definition with misplaced open brace
+			my $ok = 0;
+			my $cnt = statement_rawlines($stat);
+			my $herectx = $here . "\n";
+			for (my $n = 0; $n < $cnt; $n++) {
+				my $rl = raw_line($linenr, $n);
+				$herectx .=  $rl . "\n";
+				$ok = 1 if ($rl =~ /^[ \+]\{/);
+				$ok = 1 if ($rl =~ /\{/ && $n == 0);
+				last if $rl =~ /^[ \+].*\{/;
+			}
+			if (!$ok) {
+				ERROR("OPEN_BRACE",
+				      "open brace '{' following function definitions go on the next line\n" . $herectx);
+			}
+		}
+
 # checks for new __setup's
 		if ($rawline =~ /\b__setup\("([^"]*)"/) {
 			my $name = $1;

diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 7986f4e..7aad824 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in

@@ -14,6 +14,7 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/of_fdt.h>
 
 /* We need to stringify expanded macros so that they can be parsed */
 
@@ -50,3 +51,9 @@
 LX_VALUE(MNT_NOATIME)
 LX_VALUE(MNT_NODIRATIME)
 LX_VALUE(MNT_RELATIME)
+
+/* linux/of_fdt.h> */
+LX_VALUE(OF_DT_HEADER)
+
+/* Kernel Configs */
+LX_CONFIG(CONFIG_OF)

diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index 5afd109..6d2e09a 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py

@@ -12,6 +12,7 @@
 #
 
 import gdb
+import sys
 
 from linux import utils
 
@@ -24,7 +25,7 @@
 
     def invoke(self, arg, from_tty):
         log_buf_addr = int(str(gdb.parse_and_eval(
-            "'printk.c'::log_buf")).split()[0], 16)
+            "(void *)'printk.c'::log_buf")).split()[0], 16)
         log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx"))
         log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx"))
         log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len"))
@@ -52,13 +53,19 @@
                 continue
 
             text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
-            text = log_buf[pos + 16:pos + 16 + text_len].decode()
+            text = log_buf[pos + 16:pos + 16 + text_len].decode(
+                encoding='utf8', errors='replace')
             time_stamp = utils.read_u64(log_buf[pos:pos + 8])
 
             for line in text.splitlines():
-                gdb.write("[{time:12.6f}] {line}\n".format(
+                msg = u"[{time:12.6f}] {line}\n".format(
                     time=time_stamp / 1000000000.0,
-                    line=line))
+                    line=line)
+                # With python2 gdb.write will attempt to convert unicode to
+                # ascii and might fail so pass an utf8-encoded str instead.
+                if sys.hexversion < 0x03000000:
+                    msg = msg.encode(encoding='utf8', errors='replace')
+                gdb.write(msg)
 
             pos += length
 

diff --git a/scripts/gdb/linux/proc.py b/scripts/gdb/linux/proc.py
index 38b1f09..086d272 100644
--- a/scripts/gdb/linux/proc.py
+++ b/scripts/gdb/linux/proc.py

@@ -16,6 +16,7 @@
 from linux import utils
 from linux import tasks
 from linux import lists
+from struct import *
 
 
 class LxCmdLine(gdb.Command):
@@ -195,3 +196,75 @@
                         info_opts(MNT_INFO, m_flags)))
 
 LxMounts()
+
+
+class LxFdtDump(gdb.Command):
+    """Output Flattened Device Tree header and dump FDT blob to the filename
+       specified as the command argument. Equivalent to
+       'cat /proc/fdt > fdtdump.dtb' on a running target"""
+
+    def __init__(self):
+        super(LxFdtDump, self).__init__("lx-fdtdump", gdb.COMMAND_DATA,
+                                        gdb.COMPLETE_FILENAME)
+
+    def fdthdr_to_cpu(self, fdt_header):
+
+        fdt_header_be = ">IIIIIII"
+        fdt_header_le = "<IIIIIII"
+
+        if utils.get_target_endianness() == 1:
+            output_fmt = fdt_header_le
+        else:
+            output_fmt = fdt_header_be
+
+        return unpack(output_fmt, pack(fdt_header_be,
+                                       fdt_header['magic'],
+                                       fdt_header['totalsize'],
+                                       fdt_header['off_dt_struct'],
+                                       fdt_header['off_dt_strings'],
+                                       fdt_header['off_mem_rsvmap'],
+                                       fdt_header['version'],
+                                       fdt_header['last_comp_version']))
+
+    def invoke(self, arg, from_tty):
+
+        if not constants.LX_CONFIG_OF:
+            raise gdb.GdbError("Kernel not compiled with CONFIG_OF\n")
+
+        if len(arg) == 0:
+            filename = "fdtdump.dtb"
+        else:
+            filename = arg
+
+        py_fdt_header_ptr = gdb.parse_and_eval(
+            "(const struct fdt_header *) initial_boot_params")
+        py_fdt_header = py_fdt_header_ptr.dereference()
+
+        fdt_header = self.fdthdr_to_cpu(py_fdt_header)
+
+        if fdt_header[0] != constants.LX_OF_DT_HEADER:
+            raise gdb.GdbError("No flattened device tree magic found\n")
+
+        gdb.write("fdt_magic:         0x{:02X}\n".format(fdt_header[0]))
+        gdb.write("fdt_totalsize:     0x{:02X}\n".format(fdt_header[1]))
+        gdb.write("off_dt_struct:     0x{:02X}\n".format(fdt_header[2]))
+        gdb.write("off_dt_strings:    0x{:02X}\n".format(fdt_header[3]))
+        gdb.write("off_mem_rsvmap:    0x{:02X}\n".format(fdt_header[4]))
+        gdb.write("version:           {}\n".format(fdt_header[5]))
+        gdb.write("last_comp_version: {}\n".format(fdt_header[6]))
+
+        inf = gdb.inferiors()[0]
+        fdt_buf = utils.read_memoryview(inf, py_fdt_header_ptr,
+                                        fdt_header[1]).tobytes()
+
+        try:
+            f = open(filename, 'wb')
+        except:
+            raise gdb.GdbError("Could not open file to dump fdt")
+
+        f.write(fdt_buf)
+        f.close()
+
+        gdb.write("Dumped fdt blob to " + filename + "\n")
+
+LxFdtDump()

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 30d752a..48397fe 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c

@@ -2126,6 +2126,7 @@ static void add_header(struct buffer *b, struct module *mod)
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
+	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
 	buf_printf(b, "\n");
 	buf_printf(b, "__visible struct module __this_module\n");
 	buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");

diff --git a/security/Kconfig b/security/Kconfig
index d540bfe..e8e4494 100644
--- a/security/Kconfig
+++ b/security/Kconfig

@@ -163,6 +163,13 @@
 	  been removed. This config is intended to be used only while
 	  trying to find such users.
 
+config FORTIFY_SOURCE
+	bool "Harden common str/mem functions against buffer overflows"
+	depends on ARCH_HAS_FORTIFY_SOURCE
+	help
+	  Detect overflows of buffers in common string and memory functions
+	  where the compiler can determine and validate the buffer sizes.
+
 config STATIC_USERMODEHELPER
 	bool "Force all usermode helper calls through a single binary"
 	help

diff --git a/security/keys/compat_dh.c b/security/keys/compat_dh.c
index a6a659b..aa6b34c 100644
--- a/security/keys/compat_dh.c
+++ b/security/keys/compat_dh.c

@@ -33,6 +33,8 @@ long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	kdfcopy.hashname = compat_ptr(compat_kdfcopy.hashname);
 	kdfcopy.otherinfo = compat_ptr(compat_kdfcopy.otherinfo);
 	kdfcopy.otherinfolen = compat_kdfcopy.otherinfolen;
+	memcpy(kdfcopy.__spare, compat_kdfcopy.__spare,
+	       sizeof(kdfcopy.__spare));
 
 	return __keyctl_dh_compute(params, buffer, buflen, &kdfcopy);
 }

diff --git a/security/keys/dh.c b/security/keys/dh.c
index 4755d4b..d1ea9f3 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c

@@ -266,6 +266,11 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	if (kdfcopy) {
 		char *hashname;
 
+		if (memchr_inv(kdfcopy->__spare, 0, sizeof(kdfcopy->__spare))) {
+			ret = -EINVAL;
+			goto out1;
+		}
+
 		if (buflen > KEYCTL_KDF_MAX_OUTPUT_LEN ||
 		    kdfcopy->otherinfolen > KEYCTL_KDF_MAX_OI_LEN) {
 			ret = -EMSGSIZE;

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b3d5bed..22995cb 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c

@@ -238,10 +238,8 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
 {
 	if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
 		return false;
-	/* check architectures that return -EINVAL from dma_mmap_coherent() */
-	/* FIXME: this should be some global flag */
-#if defined(CONFIG_C6X) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) ||\
-	defined(CONFIG_PARISC) || defined(CONFIG_XTENSA)
+	/* architecture supports dma_mmap_coherent()? */
+#if defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP) || !defined(CONFIG_HAS_DMA)
 	if (!substream->ops->mmap &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
 		return false;
@@ -3502,7 +3500,7 @@ int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
 	}
 #endif /* CONFIG_GENERIC_ALLOCATOR */
 #ifndef CONFIG_X86 /* for avoiding warnings arch/x86/mm/pat.c */
-	if (!substream->ops->page &&
+	if (IS_ENABLED(CONFIG_HAS_DMA) && !substream->ops->page &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
 		return dma_mmap_coherent(substream->dma_buffer.dev.dev,
 					 area,

diff --git a/sound/drivers/opl4/opl4_lib.c b/sound/drivers/opl4/opl4_lib.c
index bc345d5..db76a5b 100644
--- a/sound/drivers/opl4/opl4_lib.c
+++ b/sound/drivers/opl4/opl4_lib.c

@@ -29,7 +29,7 @@ MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("OPL4 driver");
 MODULE_LICENSE("GPL");
 
-static void inline snd_opl4_wait(struct snd_opl4 *opl4)
+static inline void snd_opl4_wait(struct snd_opl4 *opl4)
 {
 	int timeout = 10;
 	while ((inb(opl4->fm_port) & OPL4_STATUS_BUSY) && --timeout > 0)

diff --git a/sound/isa/msnd/msnd_midi.c b/sound/isa/msnd/msnd_midi.c
index 912b5a9..013d8d1 100644
--- a/sound/isa/msnd/msnd_midi.c
+++ b/sound/isa/msnd/msnd_midi.c

@@ -120,24 +120,24 @@ void snd_msndmidi_input_read(void *mpuv)
 	unsigned long flags;
 	struct snd_msndmidi *mpu = mpuv;
 	void *pwMIDQData = mpu->dev->mappedbase + MIDQ_DATA_BUFF;
+	u16 head, tail, size;
 
 	spin_lock_irqsave(&mpu->input_lock, flags);
-	while (readw(mpu->dev->MIDQ + JQS_wTail) !=
-	       readw(mpu->dev->MIDQ + JQS_wHead)) {
-		u16 wTmp, val;
-		val = readw(pwMIDQData + 2 * readw(mpu->dev->MIDQ + JQS_wHead));
+	head = readw(mpu->dev->MIDQ + JQS_wHead);
+	tail = readw(mpu->dev->MIDQ + JQS_wTail);
+	size = readw(mpu->dev->MIDQ + JQS_wSize);
+	if (head > size || tail > size)
+		goto out;
+	while (head != tail) {
+		unsigned char val = readw(pwMIDQData + 2 * head);
 
-			if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER,
-				     &mpu->mode))
-				snd_rawmidi_receive(mpu->substream_input,
-						    (unsigned char *)&val, 1);
-
-		wTmp = readw(mpu->dev->MIDQ + JQS_wHead) + 1;
-		if (wTmp > readw(mpu->dev->MIDQ + JQS_wSize))
-			writew(0,  mpu->dev->MIDQ + JQS_wHead);
-		else
-			writew(wTmp,  mpu->dev->MIDQ + JQS_wHead);
+		if (test_bit(MSNDMIDI_MODE_BIT_INPUT_TRIGGER, &mpu->mode))
+			snd_rawmidi_receive(mpu->substream_input, &val, 1);
+		if (++head > size)
+			head = 0;
+		writew(head, mpu->dev->MIDQ + JQS_wHead);
 	}
+ out:
 	spin_unlock_irqrestore(&mpu->input_lock, flags);
 }
 EXPORT_SYMBOL(snd_msndmidi_input_read);

diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index ad48973..fc4fb19 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c

@@ -170,23 +170,24 @@ static irqreturn_t snd_msnd_interrupt(int irq, void *dev_id)
 {
 	struct snd_msnd *chip = dev_id;
 	void *pwDSPQData = chip->mappedbase + DSPQ_DATA_BUFF;
+	u16 head, tail, size;
 
 	/* Send ack to DSP */
 	/* inb(chip->io + HP_RXL); */
 
 	/* Evaluate queued DSP messages */
-	while (readw(chip->DSPQ + JQS_wTail) != readw(chip->DSPQ + JQS_wHead)) {
-		u16 wTmp;
-
-		snd_msnd_eval_dsp_msg(chip,
-			readw(pwDSPQData + 2 * readw(chip->DSPQ + JQS_wHead)));
-
-		wTmp = readw(chip->DSPQ + JQS_wHead) + 1;
-		if (wTmp > readw(chip->DSPQ + JQS_wSize))
-			writew(0, chip->DSPQ + JQS_wHead);
-		else
-			writew(wTmp, chip->DSPQ + JQS_wHead);
+	head = readw(chip->DSPQ + JQS_wHead);
+	tail = readw(chip->DSPQ + JQS_wTail);
+	size = readw(chip->DSPQ + JQS_wSize);
+	if (head > size || tail > size)
+		goto out;
+	while (head != tail) {
+		snd_msnd_eval_dsp_msg(chip, readw(pwDSPQData + 2 * head));
+		if (++head > size)
+			head = 0;
+		writew(head, chip->DSPQ + JQS_wHead);
 	}
+ out:
 	/* Send ack to DSP */
 	inb(chip->io + HP_RXL);
 	return IRQ_HANDLED;

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 76c85f0..d549f35 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -53,9 +53,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809)
 #define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a)
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
+#define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
+				((codec)->core.vendor_id == 0x80862800))
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
 				|| is_skylake(codec) || is_broxton(codec) \
-				|| is_kabylake(codec))
+				|| is_kabylake(codec)) || is_geminilake(codec)
 
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
@@ -3790,6 +3792,7 @@ HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",	patch_i915_glk_hdmi),
+HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI",	patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_i915_byt_hdmi),

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index cd6987b..45d58fc 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -379,6 +379,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 		break;
 	case 0x10ec0899:
 	case 0x10ec0900:
+	case 0x10ec1168:
 	case 0x10ec1220:
 		alc_update_coef_idx(codec, 0x7, 1<<1, 0);
 		break;
@@ -5179,6 +5180,7 @@ enum {
 	ALC233_FIXUP_ASUS_MIC_NO_PRESENCE,
 	ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE,
 	ALC233_FIXUP_LENOVO_MULTI_CODECS,
+	ALC294_FIXUP_LENOVO_MIC_LOCATION,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5962,6 +5964,18 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_alc662_fixup_lenovo_dual_codecs,
 	},
+	[ALC294_FIXUP_LENOVO_MIC_LOCATION] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			/* Change the mic location from front to right, otherwise there are
+			   two front mics with the same name, pulseaudio can't handle them.
+			   This is just a temporary workaround, after applying this fixup,
+			   there will be one "Front Mic" and one "Mic" in this machine.
+			 */
+			{ 0x1a, 0x04a19040 },
+			{ }
+		},
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6143,6 +6157,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+	SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -7801,6 +7816,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662),
 	HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882),
 	HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882),
+	HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882),
 	HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882),
 	{} /* terminator */
 };

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0a8a1c4..a1497c5 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c

@@ -643,7 +643,7 @@ static const struct {
 	{ "__GFP_FS",			"F" },
 	{ "__GFP_COLD",			"CO" },
 	{ "__GFP_NOWARN",		"NWR" },
-	{ "__GFP_REPEAT",		"R" },
+	{ "__GFP_RETRY_MAYFAIL",	"R" },
 	{ "__GFP_NOFAIL",		"NF" },
 	{ "__GFP_NORETRY",		"NR" },
 	{ "__GFP_COMP",			"C" },

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2ca51a8..153c3a1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile

@@ -37,6 +37,5 @@
 
 %.o: %.c
 	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
-		-I../../../../samples/bpf/ \
 		-Wno-compare-distinct-pointer-types \
 		-O2 -target bpf -c $< -o $@

diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index 487cbfb..74af266 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h

@@ -23,11 +23,19 @@
 # define __bpf_htons(x)			__builtin_bswap16(x)
 # define __bpf_constant_ntohs(x)	___constant_swab16(x)
 # define __bpf_constant_htons(x)	___constant_swab16(x)
+# define __bpf_ntohl(x)			__builtin_bswap32(x)
+# define __bpf_htonl(x)			__builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)	___constant_swab32(x)
+# define __bpf_constant_htonl(x)	___constant_swab32(x)
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 # define __bpf_ntohs(x)			(x)
 # define __bpf_htons(x)			(x)
 # define __bpf_constant_ntohs(x)	(x)
 # define __bpf_constant_htons(x)	(x)
+# define __bpf_ntohl(x)			(x)
+# define __bpf_htonl(x)			(x)
+# define __bpf_constant_ntohl(x)	(x)
+# define __bpf_constant_htonl(x)	(x)
 #else
 # error "Fix your compiler's __BYTE_ORDER__?!"
 #endif
@@ -38,5 +46,11 @@
 #define bpf_ntohs(x)				\
 	(__builtin_constant_p(x) ?		\
 	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohl(x) : __bpf_ntohl(x))
 
 #endif /* __BPF_ENDIAN__ */

diff --git a/samples/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
similarity index 100%
rename from samples/bpf/bpf_helpers.h
rename to tools/testing/selftests/bpf/bpf_helpers.h


diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
new file mode 100644
index 0000000..b9302cc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc

@@ -0,0 +1,36 @@
+#!/bin/sh
+# description: Kprobe event auto/manual naming
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on function without name" ;:
+
+FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+PROBE_NAME=`echo $FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on function with new name" ;:
+
+echo "p:event1 $FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on function with new name and group" ;:
+
+echo "p:kprobes2/event2 $FUNC" > kprobe_events
+test -d events/kprobes2/event2 || exit_failure
+
+:;: "Add an event on dot function without name" ;:
+
+FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
+[ "x" != "x$EVENT" ] || exit_failure
+test -d events/$EVENT || exit_failure
+
+echo > kprobe_events

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
new file mode 100644
index 0000000..6d634e4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc

@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Kprobe dynamic event - probing module
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on a module function without specifying event name" ;:
+
+MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
+FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
+[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
+echo "p $MOD:$FUNC" > kprobe_events
+PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on a module function with new event name" ;:
+
+echo "p:event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on a module function with new event and group name" ;:
+
+echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes1/event1 || exit_failure
+
+echo > kprobe_events

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index f4d1ff7..2a1cb99 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc

@@ -2,10 +2,10 @@
 # description: Register/unregister many kprobe events
 
 # ftrace fentry skip size depends on the machine architecture.
-# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
   x86_64|i[3456]86) OFFS=5;;
-  ppc*) OFFS=4;;
+  ppc64le) OFFS=8;;
   *) OFFS=0;;
 esac
 

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 13f5198..1c12b58 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh

@@ -18,6 +18,7 @@
 
 DEBUGFS=${DEBUGFS-/sys/kernel/debug}
 
+DB_BITMASK=0x7FFF
 PERF_RUN_ORDER=32
 MAX_MW_SIZE=0
 RUN_DMA_TESTS=
@@ -38,6 +39,7 @@
 	echo "be highly recommended."
 	echo
 	echo "Options:"
+	echo "  -b BITMASK      doorbell clear bitmask for ntb_tool"
 	echo "  -C              don't cleanup ntb modules on exit"
 	echo "  -d              run dma tests"
 	echo "  -h              show this help message"
@@ -52,8 +54,9 @@
 function parse_args()
 {
 	OPTIND=0
-	while getopts "Cdhlm:r:p:w:" opt; do
+	while getopts "b:Cdhlm:r:p:w:" opt; do
 		case "$opt" in
+		b)  DB_BITMASK=${OPTARG} ;;
 		C)  DONT_CLEANUP=1 ;;
 		d)  RUN_DMA_TESTS=1 ;;
 		h)  show_help; exit 0 ;;
@@ -85,6 +88,10 @@
 function _modprobe()
 {
         modprobe "$@"
+
+	if [[ "$REMOTE_HOST" != "" ]]; then
+		ssh "$REMOTE_HOST" modprobe "$@"
+	fi
 }
 
 function split_remote()
@@ -154,7 +161,7 @@
 
 	echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
 
-	write_file "c 0xFFFFFFFF" "$REM/db"
+	write_file "c $DB_BITMASK" "$REM/db"
 
 	for ((i=1; i <= 8; i++)); do
 		let DB=$(read_file "$REM/db") || true

diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
index b3c33e0..95c320b 100644
--- a/tools/testing/selftests/sysctl/Makefile
+++ b/tools/testing/selftests/sysctl/Makefile

@@ -4,8 +4,7 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
 all:
 
-TEST_PROGS := run_numerictests run_stringtests
-TEST_FILES := common_tests
+TEST_PROGS := sysctl.sh
 
 include ../lib.mk
 

diff --git a/tools/testing/selftests/sysctl/common_tests b/tools/testing/selftests/sysctl/common_tests
deleted file mode 100644
index b686232..0000000
--- a/tools/testing/selftests/sysctl/common_tests
+++ /dev/null

@@ -1,131 +0,0 @@
-#!/bin/sh
-
-TEST_FILE=$(mktemp)
-
-echo "== Testing sysctl behavior against ${TARGET} =="
-
-set_orig()
-{
-	echo "${ORIG}" > "${TARGET}"
-}
-
-set_test()
-{
-	echo "${TEST_STR}" > "${TARGET}"
-}
-
-verify()
-{
-	local seen
-	seen=$(cat "$1")
-	if [ "${seen}" != "${TEST_STR}" ]; then
-		return 1
-	fi
-	return 0
-}
-
-exit_test()
-{
-	if [ ! -z ${old_strict} ]; then
-		echo ${old_strict} > ${WRITES_STRICT}
-	fi
-	exit $rc
-}
-
-trap 'set_orig; rm -f "${TEST_FILE}"' EXIT
-
-rc=0
-
-echo -n "Writing test file ... "
-echo "${TEST_STR}" > "${TEST_FILE}"
-if ! verify "${TEST_FILE}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl is not set to test value ... "
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Writing sysctl from shell ... "
-set_test
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Resetting sysctl to original value ... "
-set_orig
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	exit 1
-else
-	echo "ok"
-fi
-
-echo -n "Checking write strict setting ... "
-WRITES_STRICT="${SYSCTL}/kernel/sysctl_writes_strict"
-if [ ! -e ${WRITES_STRICT} ]; then
-	echo "FAIL, but skip in case of old kernel" >&2
-else
-	old_strict=$(cat ${WRITES_STRICT})
-	if [ "$old_strict" = "1" ]; then
-		echo "ok"
-	else
-		echo "FAIL, strict value is 0 but force to 1 to continue" >&2
-		echo "1" > ${WRITES_STRICT}
-	fi
-fi
-
-# Now that we've validated the sanity of "set_test" and "set_orig",
-# we can use those functions to set starting states before running
-# specific behavioral tests.
-
-echo -n "Writing entire sysctl in single write ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing middle of sysctl after synchronized seek ... "
-set_test
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing beyond end of sysctl ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
-if verify "${TARGET}"; then
-        echo "FAIL" >&2
-        rc=1
-else
-        echo "ok"
-fi
-
-echo -n "Writing sysctl with multiple long writes ... "
-set_orig
-(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
-	dd of="${TARGET}" bs=50 2>/dev/null
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi

diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
new file mode 100644
index 0000000..6ca1480
--- /dev/null
+++ b/tools/testing/selftests/sysctl/config

@@ -0,0 +1 @@
+CONFIG_TEST_SYSCTL=y

diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests
deleted file mode 100755
index e6e76c9..0000000
--- a/tools/testing/selftests/sysctl/run_numerictests
+++ /dev/null

@@ -1,10 +0,0 @@
-#!/bin/sh
-
-SYSCTL="/proc/sys"
-TARGET="${SYSCTL}/vm/swappiness"
-ORIG=$(cat "${TARGET}")
-TEST_STR=$(( $ORIG + 1 ))
-
-. ./common_tests
-
-exit_test

diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests
deleted file mode 100755
index 857ec66..0000000
--- a/tools/testing/selftests/sysctl/run_stringtests
+++ /dev/null

@@ -1,77 +0,0 @@
-#!/bin/sh
-
-SYSCTL="/proc/sys"
-TARGET="${SYSCTL}/kernel/domainname"
-ORIG=$(cat "${TARGET}")
-TEST_STR="Testing sysctl"
-
-. ./common_tests
-
-# Only string sysctls support seeking/appending.
-MAXLEN=65
-
-echo -n "Writing entire sysctl in short writes ... "
-set_orig
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
-if ! verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Writing middle of sysctl after unsynchronized seek ... "
-set_test
-dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
-if verify "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
-	dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
-if ! grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl keeps original string on overflow append ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
-	dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl stays NULL terminated on write ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
-	dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-echo -n "Checking sysctl stays NULL terminated on overwrite ... "
-set_orig
-perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
-	dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
-if grep -q B "${TARGET}"; then
-	echo "FAIL" >&2
-	rc=1
-else
-	echo "ok"
-fi
-
-exit_test

diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
new file mode 100644
index 0000000..ec232c3
--- /dev/null
+++ b/tools/testing/selftests/sysctl/sysctl.sh

@@ -0,0 +1,774 @@
+#!/bin/bash
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This performs a series tests against the proc sysctl interface.
+
+TEST_NAME="sysctl"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+TEST_FILE=$(mktemp)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:1:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1"
+
+test_modprobe()
+{
+       if [ ! -d $DIR ]; then
+               echo "$0: $DIR not present" >&2
+               echo "You must have the following enabled in your kernel:" >&2
+               cat $TEST_DIR/config >&2
+               exit 1
+       fi
+}
+
+function allow_user_defaults()
+{
+	if [ -z $DIR ]; then
+		DIR="/sys/module/test_sysctl/"
+	fi
+	if [ -z $DEFAULT_NUM_TESTS ]; then
+		DEFAULT_NUM_TESTS=50
+	fi
+	if [ -z $SYSCTL ]; then
+		SYSCTL="/proc/sys/debug/test_sysctl"
+	fi
+	if [ -z $PROD_SYSCTL ]; then
+		PROD_SYSCTL="/proc/sys"
+	fi
+	if [ -z $WRITES_STRICT ]; then
+		WRITES_STRICT="${PROD_SYSCTL}/kernel/sysctl_writes_strict"
+	fi
+}
+
+function check_production_sysctl_writes_strict()
+{
+	echo -n "Checking production write strict setting ... "
+	if [ ! -e ${WRITES_STRICT} ]; then
+		echo "FAIL, but skip in case of old kernel" >&2
+	else
+		old_strict=$(cat ${WRITES_STRICT})
+		if [ "$old_strict" = "1" ]; then
+			echo "ok"
+		else
+			echo "FAIL, strict value is 0 but force to 1 to continue" >&2
+			echo "1" > ${WRITES_STRICT}
+		fi
+	fi
+
+	if [ -z $PAGE_SIZE ]; then
+		PAGE_SIZE=$(getconf PAGESIZE)
+	fi
+	if [ -z $MAX_DIGITS ]; then
+		MAX_DIGITS=$(($PAGE_SIZE/8))
+	fi
+	if [ -z $INT_MAX ]; then
+		INT_MAX=$(getconf INT_MAX)
+	fi
+	if [ -z $UINT_MAX ]; then
+		UINT_MAX=$(getconf UINT_MAX)
+	fi
+}
+
+test_reqs()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit 0
+	fi
+
+	if ! which perl 2> /dev/null > /dev/null; then
+		echo "$0: You need perl installed"
+		exit 1
+	fi
+	if ! which getconf 2> /dev/null > /dev/null; then
+		echo "$0: You need getconf installed"
+		exit 1
+	fi
+	if ! which diff 2> /dev/null > /dev/null; then
+		echo "$0: You need diff installed"
+		exit 1
+	fi
+}
+
+function load_req_mod()
+{
+	trap "test_modprobe" EXIT
+
+	if [ ! -d $DIR ]; then
+		modprobe $TEST_DRIVER
+		if [ $? -ne 0 ]; then
+			exit
+		fi
+	fi
+}
+
+reset_vals()
+{
+	VAL=""
+	TRIGGER=$(basename ${TARGET})
+	case "$TRIGGER" in
+		int_0001)
+			VAL="60"
+			;;
+		int_0002)
+			VAL="1"
+			;;
+		uint_0001)
+			VAL="314"
+			;;
+		string_0001)
+			VAL="(none)"
+			;;
+		*)
+			;;
+	esac
+	echo -n $VAL > $TARGET
+}
+
+set_orig()
+{
+	if [ ! -z $TARGET ]; then
+		echo "${ORIG}" > "${TARGET}"
+	fi
+}
+
+set_test()
+{
+	echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+	local seen
+	seen=$(cat "$1")
+	if [ "${seen}" != "${TEST_STR}" ]; then
+		return 1
+	fi
+	return 0
+}
+
+verify_diff_w()
+{
+	echo "$TEST_STR" | diff -q -w -u - $1
+	return $?
+}
+
+test_rc()
+{
+	if [[ $rc != 0 ]]; then
+		echo "Failed test, return value: $rc" >&2
+		exit $rc
+	fi
+}
+
+test_finish()
+{
+	set_orig
+	rm -f "${TEST_FILE}"
+
+	if [ ! -z ${old_strict} ]; then
+		echo ${old_strict} > ${WRITES_STRICT}
+	fi
+	exit $rc
+}
+
+run_numerictests()
+{
+	echo "== Testing sysctl behavior against ${TARGET} =="
+
+	rc=0
+
+	echo -n "Writing test file ... "
+	echo "${TEST_STR}" > "${TEST_FILE}"
+	if ! verify "${TEST_FILE}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl is not set to test value ... "
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl from shell ... "
+	set_test
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Resetting sysctl to original value ... "
+	set_orig
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	# Now that we've validated the sanity of "set_test" and "set_orig",
+	# we can use those functions to set starting states before running
+	# specific behavioral tests.
+
+	echo -n "Writing entire sysctl in single write ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after synchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing beyond end of sysctl ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl with multiple long writes ... "
+	set_orig
+	(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" bs=50 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# Your test must accept digits 3 and 4 to use this
+run_limit_digit()
+{
+	echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+	reset_vals
+
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="3"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+	reset_vals
+
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="4"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You are using an int
+run_limit_digit_int()
+{
+	echo -n "Testing INT_MAX works ..."
+	reset_vals
+	TEST_STR="$INT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing INT_MAX + 1 will fail as expected..."
+	reset_vals
+	let TEST_STR=$INT_MAX+1
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will work as expected..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You used an int array
+run_limit_digit_int_array()
+{
+	echo -n "Testing array works as expected ... "
+	TEST_STR="4 3 2 1"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing skipping trailing array elements works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# If we only echo in two digits the last two are left intact
+	TEST_STR="100 101"
+	echo -n $TEST_STR > $TARGET
+	# After we echo in, to help diff we need to set on TEST_STR what
+	# we expect the result to be.
+	TEST_STR="100 101 2 1"
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing PAGE_SIZE limit on array works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Even if you use an int array, you are still restricted to
+	# MAX_DIGITS, this is a known limitation. Test limit works.
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="9"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="9 101 2 1"
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing exceeding PAGE_SIZE limit fails as expected ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Now go over limit.
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="7"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="7 101 2 1"
+	if verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You are using an unsigned int
+run_limit_digit_uint()
+{
+	echo -n "Testing UINT_MAX works ..."
+	reset_vals
+	TEST_STR="$UINT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing UINT_MAX + 1 will fail as expected..."
+	reset_vals
+	TEST_STR=$(($UINT_MAX+1))
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will not work as expected ..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+run_stringtests()
+{
+	echo -n "Writing entire sysctl in short writes ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after unsynchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if ! grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl keeps original string on overflow append ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on write ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+		dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	test_rc
+}
+
+sysctl_test_0001()
+{
+	TARGET="${SYSCTL}/int_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+}
+
+sysctl_test_0002()
+{
+	TARGET="${SYSCTL}/string_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR="Testing sysctl"
+	# Only string sysctls support seeking/appending.
+	MAXLEN=65
+
+	run_numerictests
+	run_stringtests
+}
+
+sysctl_test_0003()
+{
+	TARGET="${SYSCTL}/int_0002"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_int
+}
+
+sysctl_test_0004()
+{
+	TARGET="${SYSCTL}/uint_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_uint
+}
+
+sysctl_test_0005()
+{
+	TARGET="${SYSCTL}/int_0003"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+
+	run_limit_digit_int_array
+}
+
+list_tests()
+{
+	echo "Test ID list:"
+	echo
+	echo "TEST_ID x NUM_TEST"
+	echo "TEST_ID:   Test ID"
+	echo "NUM_TESTS: Number of recommended times to run the test"
+	echo
+	echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
+	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+	echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
+	echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
+	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
+}
+
+test_reqs
+
+usage()
+{
+	NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+	let NUM_TESTS=$NUM_TESTS+1
+	MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+	echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+	echo "		 [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+	echo "           [ all ] [ -h | --help ] [ -l ]"
+	echo ""
+	echo "Valid tests: 0001-$MAX_TEST"
+	echo ""
+	echo "    all     Runs all tests (default)"
+	echo "    -t      Run test ID the number amount of times is recommended"
+	echo "    -w      Watch test ID run until it runs into an error"
+	echo "    -c      Run test ID once"
+	echo "    -s      Run test ID x test-count number of times"
+	echo "    -l      List all test ID list"
+	echo " -h|--help  Help"
+	echo
+	echo "If an error every occurs execution will immediately terminate."
+	echo "If you are adding a new test try using -w <test-ID> first to"
+	echo "make sure the test passes a series of tests."
+	echo
+	echo Example uses:
+	echo
+	echo "$TEST_NAME.sh            -- executes all tests"
+	echo "$TEST_NAME.sh -t 0002    -- Executes test ID 0002 number of times is recomended"
+	echo "$TEST_NAME.sh -w 0002    -- Watch test ID 0002 run until an error occurs"
+	echo "$TEST_NAME.sh -s 0002    -- Run test ID 0002 once"
+	echo "$TEST_NAME.sh -c 0002 3  -- Run test ID 0002 three times"
+	echo
+	list_tests
+	exit 1
+}
+
+function test_num()
+{
+	re='^[0-9]+$'
+	if ! [[ $1 =~ $re ]]; then
+		usage
+	fi
+}
+
+function get_test_count()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	LAST_TWO=${TEST_DATA#*:*}
+	echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+	for i in $ALL_TESTS ; do
+		TEST_ID=${i%:*:*}
+		ENABLED=$(get_test_enabled $TEST_ID)
+		TEST_COUNT=$(get_test_count $TEST_ID)
+		if [[ $ENABLED -eq "1" ]]; then
+			test_case $TEST_ID $TEST_COUNT
+		fi
+	done
+}
+
+function watch_log()
+{
+	if [ $# -ne 3 ]; then
+		clear
+	fi
+	date
+	echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+	i=0
+	while [ 1 ]; do
+
+		if [ $# -eq 1 ]; then
+			test_num $1
+			watch_log $i ${TEST_NAME}_test_$1
+			${TEST_NAME}_test_$1
+		else
+			watch_log $i all
+			run_all_tests
+		fi
+		let i=$i+1
+	done
+}
+
+function test_case()
+{
+	NUM_TESTS=$DEFAULT_NUM_TESTS
+	if [ $# -eq 2 ]; then
+		NUM_TESTS=$2
+	fi
+
+	i=0
+	while [ $i -lt $NUM_TESTS ]; do
+		test_num $1
+		watch_log $i ${TEST_NAME}_test_$1 noclear
+		RUN_TEST=${TEST_NAME}_test_$1
+		$RUN_TEST
+		let i=$i+1
+	done
+}
+
+function parse_args()
+{
+	if [ $# -eq 0 ]; then
+		run_all_tests
+	else
+		if [[ "$1" = "all" ]]; then
+			run_all_tests
+		elif [[ "$1" = "-w" ]]; then
+			shift
+			watch_case $@
+		elif [[ "$1" = "-t" ]]; then
+			shift
+			test_num $1
+			test_case $1 $(get_test_count $1)
+		elif [[ "$1" = "-c" ]]; then
+			shift
+			test_num $1
+			test_num $2
+			test_case $1 $2
+		elif [[ "$1" = "-s" ]]; then
+			shift
+			test_case $1 1
+		elif [[ "$1" = "-l" ]]; then
+			list_tests
+		elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+			usage
+		else
+			usage
+		fi
+	fi
+}
+
+test_reqs
+allow_user_defaults
+check_production_sysctl_writes_strict
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0

diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 5801bbe..a9b8613 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile

@@ -9,7 +9,7 @@
 
 TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch freq-step leap-a-day \
-		      leapcrash set-tai set-2038 set-tz
+		      leapcrash set-tai set-2038 set-tz rtctest_setdate
 
 
 include ../lib.mk

diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
index 4230d30..f61170f 100644
--- a/tools/testing/selftests/timers/rtctest.c
+++ b/tools/testing/selftests/timers/rtctest.c

@@ -21,6 +21,9 @@
 #include <stdlib.h>
 #include <errno.h>
 
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
 
 /*
  * This expects the new RTC class driver framework, working with
@@ -29,23 +32,84 @@
  */
 static const char default_rtc[] = "/dev/rtc0";
 
+static struct rtc_time cutoff_dates[] = {
+	{
+		.tm_year = 70, /* 1970 -1900 */
+		.tm_mday = 1,
+	},
+	/* signed time_t 19/01/2038 3:14:08 */
+	{
+		.tm_year = 138,
+		.tm_mday = 19,
+	},
+	{
+		.tm_year = 138,
+		.tm_mday = 20,
+	},
+	{
+		.tm_year = 199, /* 2099 -1900 */
+		.tm_mday = 1,
+	},
+	{
+		.tm_year = 200, /* 2100 -1900 */
+		.tm_mday = 1,
+	},
+	/* unsigned time_t 07/02/2106 7:28:15*/
+	{
+		.tm_year = 205,
+		.tm_mon = 1,
+		.tm_mday = 7,
+	},
+	{
+		.tm_year = 206,
+		.tm_mon = 1,
+		.tm_mday = 8,
+	},
+	/* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
+	{
+		.tm_year = 362,
+		.tm_mon = 3,
+		.tm_mday = 12,
+	},
+	{
+		.tm_year = 362, /* 2262 -1900 */
+		.tm_mon = 3,
+		.tm_mday = 13,
+	},
+};
+
+static int compare_dates(struct rtc_time *a, struct rtc_time *b)
+{
+	if (a->tm_year != b->tm_year ||
+	    a->tm_mon != b->tm_mon ||
+	    a->tm_mday != b->tm_mday ||
+	    a->tm_hour != b->tm_hour ||
+	    a->tm_min != b->tm_min ||
+	    ((b->tm_sec - a->tm_sec) > 1))
+		return 1;
+
+	return 0;
+}
 
 int main(int argc, char **argv)
 {
-	int i, fd, retval, irqcount = 0;
+	int i, fd, retval, irqcount = 0, dangerous = 0;
 	unsigned long tmp, data;
 	struct rtc_time rtc_tm;
 	const char *rtc = default_rtc;
 	struct timeval start, end, diff;
 
 	switch (argc) {
+	case 3:
+		if (*argv[2] == 'd')
+			dangerous = 1;
 	case 2:
 		rtc = argv[1];
 		/* FALLTHROUGH */
 	case 1:
 		break;
 	default:
-		fprintf(stderr, "usage:  rtctest [rtcdev]\n");
+		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
 		return 1;
 	}
 
@@ -202,7 +266,7 @@ int main(int argc, char **argv)
 		/* not all RTCs support periodic IRQs */
 		if (errno == EINVAL) {
 			fprintf(stderr, "\nNo periodic IRQ support\n");
-			goto done;
+			goto test_DATE;
 		}
 		perror("RTC_IRQP_READ ioctl");
 		exit(errno);
@@ -221,7 +285,7 @@ int main(int argc, char **argv)
 			if (errno == EINVAL) {
 				fprintf(stderr,
 					"\n...Periodic IRQ rate is fixed\n");
-				goto done;
+				goto test_DATE;
 			}
 			perror("RTC_IRQP_SET ioctl");
 			exit(errno);
@@ -269,6 +333,62 @@ int main(int argc, char **argv)
 		}
 	}
 
+test_DATE:
+	if (!dangerous)
+		goto done;
+
+	fprintf(stderr, "\nTesting problematic dates\n");
+
+	for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
+		struct rtc_time current;
+
+		/* Write the new date in RTC */
+		retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
+		if (retval == -1) {
+			perror("RTC_SET_TIME ioctl");
+			close(fd);
+			exit(errno);
+		}
+
+		/* Read back */
+		retval = ioctl(fd, RTC_RD_TIME, &current);
+		if (retval == -1) {
+			perror("RTC_RD_TIME ioctl");
+			exit(errno);
+		}
+
+		if(compare_dates(&cutoff_dates[i], &current)) {
+			fprintf(stderr,"Setting date %d failed\n",
+			        cutoff_dates[i].tm_year + 1900);
+			goto done;
+		}
+
+		cutoff_dates[i].tm_sec += 5;
+
+		/* Write the new alarm in RTC */
+		retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
+		if (retval == -1) {
+			perror("RTC_ALM_SET ioctl");
+			close(fd);
+			exit(errno);
+		}
+
+		/* Read back */
+		retval = ioctl(fd, RTC_ALM_READ, &current);
+		if (retval == -1) {
+			perror("RTC_ALM_READ ioctl");
+			exit(errno);
+		}
+
+		if(compare_dates(&cutoff_dates[i], &current)) {
+			fprintf(stderr,"Setting alarm %d failed\n",
+			        cutoff_dates[i].tm_year + 1900);
+			goto done;
+		}
+
+		fprintf(stderr, "Setting year %d is OK \n",
+			cutoff_dates[i].tm_year + 1900);
+	}
 done:
 	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
 

diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/timers/rtctest_setdate.c
new file mode 100644
index 0000000..2cb7848
--- /dev/null
+++ b/tools/testing/selftests/timers/rtctest_setdate.c

@@ -0,0 +1,86 @@
+/* Real Time Clock Driver Test
+ *	by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
+ *
+ * To build
+ *	gcc rtctest_setdate.c -o rtctest_setdate
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+static const char default_time[] = "00:00:00";
+
+int main(int argc, char **argv)
+{
+	int fd, retval;
+	struct rtc_time new, current;
+	const char *rtc, *date;
+	const char *time = default_time;
+
+	switch (argc) {
+	case 4:
+		time = argv[3];
+		/* FALLTHROUGH */
+	case 3:
+		date = argv[2];
+		rtc = argv[1];
+		break;
+	default:
+		fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+	if (fd == -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
+	new.tm_mon -= 1;
+	new.tm_year -= 1900;
+	sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
+
+	fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
+		new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
+		new.tm_hour, new.tm_min, new.tm_sec);
+
+	/* Write the new date in RTC */
+	retval = ioctl(fd, RTC_SET_TIME, &new);
+	if (retval == -1) {
+		perror("RTC_SET_TIME ioctl");
+		close(fd);
+		exit(errno);
+	}
+
+	/* Read back */
+	retval = ioctl(fd, RTC_RD_TIME, &current);
+	if (retval == -1) {
+		perror("RTC_RD_TIME ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+		current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
+		current.tm_hour, current.tm_min, current.tm_sec);
+
+	close(fd);
+	return 0;
+}

diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 37d9118..d99850c 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c

@@ -51,6 +51,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
 	return vfio_group;
 }
 
+static bool kvm_vfio_external_group_match_file(struct vfio_group *group,
+					       struct file *filep)
+{
+	bool ret, (*fn)(struct vfio_group *, struct file *);
+
+	fn = symbol_get(vfio_external_group_match_file);
+	if (!fn)
+		return false;
+
+	ret = fn(group, filep);
+
+	symbol_put(vfio_external_group_match_file);
+
+	return ret;
+}
+
 static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
 {
 	void (*fn)(struct vfio_group *);
@@ -231,37 +247,31 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 		if (!f.file)
 			return -EBADF;
 
-		vfio_group = kvm_vfio_group_get_external_user(f.file);
-		fdput(f);
-
-		if (IS_ERR(vfio_group))
-			return PTR_ERR(vfio_group);
-
 		ret = -ENOENT;
 
 		mutex_lock(&kv->lock);
 
 		list_for_each_entry(kvg, &kv->group_list, node) {
-			if (kvg->vfio_group != vfio_group)
+			if (!kvm_vfio_external_group_match_file(kvg->vfio_group,
+								f.file))
 				continue;
 
 			list_del(&kvg->node);
+			kvm_arch_end_assignment(dev->kvm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+			kvm_spapr_tce_release_vfio_group(dev->kvm,
+							 kvg->vfio_group);
+#endif
+			kvm_vfio_group_set_kvm(kvg->vfio_group, NULL);
 			kvm_vfio_group_put_external_user(kvg->vfio_group);
 			kfree(kvg);
 			ret = 0;
 			break;
 		}
 
-		kvm_arch_end_assignment(dev->kvm);
-
 		mutex_unlock(&kv->lock);
 
-#ifdef CONFIG_SPAPR_TCE_IOMMU
-		kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group);
-#endif
-		kvm_vfio_group_set_kvm(vfio_group, NULL);
-
-		kvm_vfio_group_put_external_user(vfio_group);
+		fdput(f);
 
 		kvm_vfio_update_coherency(dev);
commit	deed9deb62f9121ea16f06351d1e968685a74863	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Fri Jul 14 15:33:15 2017 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Fri Jul 14 15:33:15 2017 -0700
tree	124b1282c78c4aba97f53694ba0bc0489c014a8c
parent	ccd5d1b91f22351b55feb6fdee504cb84d97752f [diff]
parent	01e6a61aceb82e13bec29502a8eb70d9574f97ad [diff]