Merge branch 'akpm' (second patch-bomb from Andrew)

Merge second patchbomb from Andrew Morton:
 - the rest of MM
 - misc fs fixes
 - add execveat() syscall
 - new ratelimit feature for fault-injection
 - decompressor updates
 - ipc/ updates
 - fallocate feature creep
 - fsnotify cleanups
 - a few other misc things

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (99 commits)
  cgroups: Documentation: fix trivial typos and wrong paragraph numberings
  parisc: percpu: update comments referring to __get_cpu_var
  percpu: update local_ops.txt to reflect this_cpu operations
  percpu: remove __get_cpu_var and __raw_get_cpu_var macros
  fsnotify: remove destroy_list from fsnotify_mark
  fsnotify: unify inode and mount marks handling
  fallocate: create FAN_MODIFY and IN_MODIFY events
  mm/cma: make kmemleak ignore CMA regions
  slub: fix cpuset check in get_any_partial
  slab: fix cpuset check in fallback_alloc
  shmdt: use i_size_read() instead of ->i_size
  ipc/shm.c: fix overly aggressive shmdt() when calls span multiple segments
  ipc/msg: increase MSGMNI, remove scaling
  ipc/sem.c: increase SEMMSL, SEMMNI, SEMOPM
  ipc/sem.c: change memory barrier in sem_lock() to smp_rmb()
  lib/decompress.c: consistency of compress formats for kernel image
  decompress_bunzip2: off by one in get_next_block()
  usr/Kconfig: make initrd compression algorithm selection not expert
  fault-inject: add ratelimit option
  ratelimit: add initialization macro
  ...
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 3171822c..618a33c 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -392,7 +392,12 @@
 locations and some common work such as cleanup has to be done.  If there is no
 cleanup needed then just return directly.
 
-The rationale is:
+Choose label names which say what the goto does or why the goto exists.  An
+example of a good name could be "out_buffer:" if the goto frees "buffer".  Avoid
+using GW-BASIC names like "err1:" and "err2:".  Also don't name them after the
+goto location like "err_kmalloc_failed:"
+
+The rationale for using gotos is:
 
 - unconditional statements are easier to understand and follow
 - nesting is reduced
@@ -403,9 +408,10 @@
 int fun(int a)
 {
 	int result = 0;
-	char *buffer = kmalloc(SIZE);
+	char *buffer;
 
-	if (buffer == NULL)
+	buffer = kmalloc(SIZE, GFP_KERNEL);
+	if (!buffer)
 		return -ENOMEM;
 
 	if (condition1) {
@@ -413,14 +419,25 @@
 			...
 		}
 		result = 1;
-		goto out;
+		goto out_buffer;
 	}
 	...
-out:
+out_buffer:
 	kfree(buffer);
 	return result;
 }
 
+A common type of bug to be aware of it "one err bugs" which look like this:
+
+err:
+	kfree(foo->bar);
+	kfree(foo);
+	return ret;
+
+The bug in this code is that on some exit paths "foo" is NULL.  Normally the
+fix for this is to split it up into two error labels "err_bar:" and "err_foo:".
+
+
 		Chapter 8: Commenting
 
 Comments are good, but there is also a danger of over-commenting.  NEVER
@@ -845,6 +862,49 @@
 	     : /* outputs */ : /* inputs */ : /* clobbers */);
 
 
+		Chapter 20: Conditional Compilation
+
+Wherever possible, don't use preprocessor conditionals (#if, #ifdef) in .c
+files; doing so makes code harder to read and logic harder to follow.  Instead,
+use such conditionals in a header file defining functions for use in those .c
+files, providing no-op stub versions in the #else case, and then call those
+functions unconditionally from .c files.  The compiler will avoid generating
+any code for the stub calls, producing identical results, but the logic will
+remain easy to follow.
+
+Prefer to compile out entire functions, rather than portions of functions or
+portions of expressions.  Rather than putting an ifdef in an expression, factor
+out part or all of the expression into a separate helper function and apply the
+conditional to that function.
+
+If you have a function or variable which may potentially go unused in a
+particular configuration, and the compiler would warn about its definition
+going unused, mark the definition as __maybe_unused rather than wrapping it in
+a preprocessor conditional.  (However, if a function or variable *always* goes
+unused, delete it.)
+
+Within code, where possible, use the IS_ENABLED macro to convert a Kconfig
+symbol into a C boolean expression, and use it in a normal C conditional:
+
+	if (IS_ENABLED(CONFIG_SOMETHING)) {
+		...
+	}
+
+The compiler will constant-fold the conditional away, and include or exclude
+the block of code just as with an #ifdef, so this will not add any runtime
+overhead.  However, this approach still allows the C compiler to see the code
+inside the block, and check it for correctness (syntax, types, symbol
+references, etc).  Thus, you still have to use an #ifdef if the code inside the
+block references symbols that will not exist if the condition is not met.
+
+At the end of any non-trivial #if or #ifdef block (more than a few lines),
+place a comment after the #endif on the same line, noting the conditional
+expression used.  For instance:
+
+#ifdef CONFIG_SOMETHING
+...
+#endif /* CONFIG_SOMETHING */
+
 
 		Appendix I: References
 
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index f13c913..653d5d7 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -42,7 +42,13 @@
 properly provides the SMBIOS info for IPMI, the driver will detect it
 and just work.  If you have a board with a standard interface (These
 will generally be either "KCS", "SMIC", or "BT", consult your hardware
-manual), choose the 'IPMI SI handler' option.
+manual), choose the 'IPMI SI handler' option.  A driver also exists
+for direct I2C access to the IPMI management controller.  Some boards
+support this, but it is unknown if it will work on every board.  For
+this, choose 'IPMI SMBus handler', but be ready to try to do some
+figuring to see if it will work on your system if the SMBIOS/APCI
+information is wrong or not present.  It is fairly safe to have both
+these enabled and let the drivers auto-detect what is present.
 
 You should generally enable ACPI on your system, as systems with IPMI
 can have ACPI tables describing them.
@@ -52,7 +58,8 @@
 detected (via ACPI or SMBIOS tables) and should just work.  Sadly,
 many boards do not have this information.  The driver attempts
 standard defaults, but they may not work.  If you fall into this
-situation, you need to read the section below named 'The SI Driver'.
+situation, you need to read the section below named 'The SI Driver' or
+"The SMBus Driver" on how to hand-configure your system.
 
 IPMI defines a standard watchdog timer.  You can enable this with the
 'IPMI Watchdog Timer' config option.  If you compile the driver into
@@ -97,7 +104,12 @@
 as an IPMI user.
 
 ipmi_si - A driver for various system interfaces.  This supports KCS,
-SMIC, and BT interfaces.
+SMIC, and BT interfaces.  Unless you have an SMBus interface or your
+own custom interface, you probably need to use this.
+
+ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the
+I2C kernel driver's SMBus interfaces to send and receive IPMI messages
+over the SMBus.
 
 ipmi_watchdog - IPMI requires systems to have a very capable watchdog
 timer.  This driver implements the standard Linux watchdog timer
@@ -476,6 +488,62 @@
 only the first three parameters (si type, address type, and address)
 are used for the comparison.  Any options are ignored for removing.
 
+The SMBus Driver (SSIF)
+-----------------------
+
+The SMBus driver allows up to 4 SMBus devices to be configured in the
+system.  By default, the driver will only register with something it
+finds in DMI or ACPI tables.  You can change this
+at module load time (for a module) with:
+
+  modprobe ipmi_ssif.o
+	addr=<i2caddr1>[,<i2caddr2>[,...]]
+	adapter=<adapter1>[,<adapter2>[...]]
+	dbg=<flags1>,<flags2>...
+        slave_addrs=<addr1>,<addr2>,...
+	[dbg_probe=1]
+
+The addresses are normal I2C addresses.  The adapter is the string
+name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
+It is *NOT* i2c-<n> itself.
+
+The debug flags are bit flags for each BMC found, they are:
+IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
+
+Setting dbg_probe to 1 will enable debugging of the probing and
+detection process for BMCs on the SMBusses.
+
+The slave_addrs specifies the IPMI address of the local BMC.  This is
+usually 0x20 and the driver defaults to that, but in case it's not, it
+can be specified when the driver starts up.
+
+Discovering the IPMI compliant BMC on the SMBus can cause devices on
+the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI
+message as a block write to the I2C bus and waits for a response.
+This action can be detrimental to some I2C devices. It is highly
+recommended that the known I2C address be given to the SMBus driver in
+the smb_addr parameter unless you have DMI or ACPI data to tell the
+driver what to use.
+
+When compiled into the kernel, the addresses can be specified on the
+kernel command line as:
+
+  ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
+	ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
+	ipmi_ssif.dbg=<flags1>[,<flags2>[...]]
+	ipmi_ssif.dbg_probe=1
+        ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]]
+
+These are the same options as on the module command line.
+
+The I2C driver does not support non-blocking access or polling, so
+this driver cannod to IPMI panic events, extend the watchdog at panic
+time, or other panic-related IPMI functions without special kernel
+patches and driver modifications.  You can get those at the openipmi
+web page.
+
+The driver supports a hot add and remove of interfaces through the I2C
+sysfs interface.
 
 Other Pieces
 ------------
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 38dc06d..4178ebd 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -41,7 +41,7 @@
 fffe0000	fffe7fff	ITCM mapping area for platforms with
 				ITCM mounted inside the CPU.
 
-ffc00000	ffdfffff	Fixmap mapping region.  Addresses provided
+ffc00000	ffefffff	Fixmap mapping region.  Addresses provided
 				by fix_to_virt() will be located here.
 
 fee00000	feffffff	Mapping of PCI I/O space. This is a static
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 10c949b..f935fac 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -312,10 +312,10 @@
  2) mkdir /sys/fs/cgroup/cpuset
  3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
  4) Create the new cgroup by doing mkdir's and write's (or echo's) in
-    the /sys/fs/cgroup virtual file system.
+    the /sys/fs/cgroup/cpuset virtual file system.
  5) Start a task that will be the "founding father" of the new job.
  6) Attach that task to the new cgroup by writing its PID to the
-    /sys/fs/cgroup/cpuset/tasks file for that cgroup.
+    /sys/fs/cgroup/cpuset tasks file for that cgroup.
  7) fork, exec or clone the job tasks from this founding father task.
 
 For example, the following sequence of commands will setup a cgroup
diff --git a/Documentation/devicetree/bindings/dma/atmel-xdma.txt b/Documentation/devicetree/bindings/dma/atmel-xdma.txt
new file mode 100644
index 0000000..0eb2b32
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/atmel-xdma.txt
@@ -0,0 +1,54 @@
+* Atmel Extensible Direct Memory Access Controller (XDMAC)
+
+* XDMA Controller
+Required properties:
+- compatible: Should be "atmel,<chip>-dma".
+  <chip> compatible description:
+  - sama5d4: first SoC adding the XDMAC
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain DMA interrupt.
+- #dma-cells: Must be <1>, used to represent the number of integer cells in
+the dmas property of client devices.
+  - The 1st cell specifies the channel configuration register:
+    - bit 13: SIF, source interface identifier, used to get the memory
+    interface identifier,
+    - bit 14: DIF, destination interface identifier, used to get the peripheral
+    interface identifier,
+    - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+dma1: dma-controller@f0004000 {
+	compatible = "atmel,sama5d4-dma";
+	reg = <0xf0004000 0x200>;
+	interrupts = <50 4 0>;
+	#dma-cells = <1>;
+};
+
+
+* DMA clients
+DMA clients connected to the Atmel XDMA controller must use the format
+described in the dma.txt file, using a one-cell specifier for each channel.
+The two cells in order are:
+1. A phandle pointing to the DMA controller.
+2. Channel configuration register. Configurable fields are:
+    - bit 13: SIF, source interface identifier, used to get the memory
+    interface identifier,
+    - bit 14: DIF, destination interface identifier, used to get the peripheral
+    interface identifier,
+  - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+i2c2: i2c@f8024000 {
+	compatible = "atmel,at91sam9x5-i2c";
+	reg = <0xf8024000 0x4000>;
+	interrupts = <34 4 6>;
+	dmas = <&dma1
+		(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+		 | AT91_XDMAC_DT_PERID(6))>,
+	       <&dma1
+		(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+		| AT91_XDMAC_DT_PERID(7))>;
+	dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 4659fd9..dc8d3aa 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -48,6 +48,7 @@
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
 	23	Shared ASRC
+	24	SAI
 
 The third cell specifies the transfer priority as below.
 
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index d75a9d7..f8c3311 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -1,7 +1,9 @@
 QCOM BAM DMA controller
 
 Required properties:
-- compatible: must contain "qcom,bam-v1.4.0" for MSM8974
+- compatible: must be one of the following:
+ * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
+ * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
 - reg: Address range for DMA registers
 - interrupts: Should contain the one interrupt shared by all channels
 - #dma-cells: must be <1>, the cell in the dmas property of the client device
diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
index 3e145c1..9cdcba24d 100644
--- a/Documentation/devicetree/bindings/dma/sun6i-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -4,7 +4,7 @@
 
 Required properties:
 
-- compatible:	Must be "allwinner,sun6i-a31-dma"
+- compatible:	Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma"
 - reg:		Should contain the registers base address and length
 - interrupts:	Should contain a reference to the interrupt used by this device
 - clocks:	Should contain a reference to the parent AHB clock
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
new file mode 100644
index 0000000..9a55ac3
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
@@ -0,0 +1,26 @@
+Rockchip IOMMU
+==============
+
+A Rockchip DRM iommu translates io virtual addresses to physical addresses for
+its master device.  Each slave device is bound to a single master device, and
+shares its clocks, power domain and irq.
+
+Required properties:
+- compatible      : Should be "rockchip,iommu"
+- reg             : Address space for the configuration registers
+- interrupts      : Interrupt specifier for the IOMMU instance
+- interrupt-names : Interrupt name for the IOMMU instance
+- #iommu-cells    : Should be <0>.  This indicates the iommu is a
+                    "single-master" device, and needs no additional information
+                    to associate with its master device.  See:
+                    Documentation/devicetree/bindings/iommu/iommu.txt
+
+Example:
+
+	vopl_mmu: iommu@ff940300 {
+		compatible = "rockchip,iommu";
+		reg = <0xff940300 0x100>;
+		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "vopl_mmu";
+		#iommu-cells = <0>;
+	};
diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine/client.txt
similarity index 100%
rename from Documentation/dmaengine.txt
rename to Documentation/dmaengine/client.txt
diff --git a/Documentation/dmatest.txt b/Documentation/dmaengine/dmatest.txt
similarity index 100%
rename from Documentation/dmatest.txt
rename to Documentation/dmaengine/dmatest.txt
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
new file mode 100644
index 0000000..766658c
--- /dev/null
+++ b/Documentation/dmaengine/provider.txt
@@ -0,0 +1,366 @@
+DMAengine controller documentation
+==================================
+
+Hardware Introduction
++++++++++++++++++++++
+
+Most of the Slave DMA controllers have the same general principles of
+operations.
+
+They have a given number of channels to use for the DMA transfers, and
+a given number of requests lines.
+
+Requests and channels are pretty much orthogonal. Channels can be used
+to serve several to any requests. To simplify, channels are the
+entities that will be doing the copy, and requests what endpoints are
+involved.
+
+The request lines actually correspond to physical lines going from the
+DMA-eligible devices to the controller itself. Whenever the device
+will want to start a transfer, it will assert a DMA request (DRQ) by
+asserting that request line.
+
+A very simple DMA controller would only take into account a single
+parameter: the transfer size. At each clock cycle, it would transfer a
+byte of data from one buffer to another, until the transfer size has
+been reached.
+
+That wouldn't work well in the real world, since slave devices might
+require a specific number of bits to be transferred in a single
+cycle. For example, we may want to transfer as much data as the
+physical bus allows to maximize performances when doing a simple
+memory copy operation, but our audio device could have a narrower FIFO
+that requires data to be written exactly 16 or 24 bits at a time. This
+is why most if not all of the DMA controllers can adjust this, using a
+parameter called the transfer width.
+
+Moreover, some DMA controllers, whenever the RAM is used as a source
+or destination, can group the reads or writes in memory into a buffer,
+so instead of having a lot of small memory accesses, which is not
+really efficient, you'll get several bigger transfers. This is done
+using a parameter called the burst size, that defines how many single
+reads/writes it's allowed to do without the controller splitting the
+transfer into smaller sub-transfers.
+
+Our theoretical DMA controller would then only be able to do transfers
+that involve a single contiguous block of data. However, some of the
+transfers we usually have are not, and want to copy data from
+non-contiguous buffers to a contiguous buffer, which is called
+scatter-gather.
+
+DMAEngine, at least for mem2dev transfers, require support for
+scatter-gather. So we're left with two cases here: either we have a
+quite simple DMA controller that doesn't support it, and we'll have to
+implement it in software, or we have a more advanced DMA controller,
+that implements in hardware scatter-gather.
+
+The latter are usually programmed using a collection of chunks to
+transfer, and whenever the transfer is started, the controller will go
+over that collection, doing whatever we programmed there.
+
+This collection is usually either a table or a linked list. You will
+then push either the address of the table and its number of elements,
+or the first item of the list to one channel of the DMA controller,
+and whenever a DRQ will be asserted, it will go through the collection
+to know where to fetch the data from.
+
+Either way, the format of this collection is completely dependent on
+your hardware. Each DMA controller will require a different structure,
+but all of them will require, for every chunk, at least the source and
+destination addresses, whether it should increment these addresses or
+not and the three parameters we saw earlier: the burst size, the
+transfer width and the transfer size.
+
+The one last thing is that usually, slave devices won't issue DRQ by
+default, and you have to enable this in your slave device driver first
+whenever you're willing to use DMA.
+
+These were just the general memory-to-memory (also called mem2mem) or
+memory-to-device (mem2dev) kind of transfers. Most devices often
+support other kind of transfers or memory operations that dmaengine
+support and will be detailed later in this document.
+
+DMA Support in Linux
+++++++++++++++++++++
+
+Historically, DMA controller drivers have been implemented using the
+async TX API, to offload operations such as memory copy, XOR,
+cryptography, etc., basically any memory to memory operation.
+
+Over time, the need for memory to device transfers arose, and
+dmaengine was extended. Nowadays, the async TX API is written as a
+layer on top of dmaengine, and acts as a client. Still, dmaengine
+accommodates that API in some cases, and made some design choices to
+ensure that it stayed compatible.
+
+For more information on the Async TX API, please look the relevant
+documentation file in Documentation/crypto/async-tx-api.txt.
+
+DMAEngine Registration
+++++++++++++++++++++++
+
+struct dma_device Initialization
+--------------------------------
+
+Just like any other kernel framework, the whole DMAEngine registration
+relies on the driver filling a structure and registering against the
+framework. In our case, that structure is dma_device.
+
+The first thing you need to do in your driver is to allocate this
+structure. Any of the usual memory allocators will do, but you'll also
+need to initialize a few fields in there:
+
+  * channels:	should be initialized as a list using the
+		INIT_LIST_HEAD macro for example
+
+  * dev: 	should hold the pointer to the struct device associated
+		to your current driver instance.
+
+Supported transaction types
+---------------------------
+
+The next thing you need is to set which transaction types your device
+(and driver) supports.
+
+Our dma_device structure has a field called cap_mask that holds the
+various types of transaction supported, and you need to modify this
+mask using the dma_cap_set function, with various flags depending on
+transaction types you support as an argument.
+
+All those capabilities are defined in the dma_transaction_type enum,
+in include/linux/dmaengine.h
+
+Currently, the types available are:
+  * DMA_MEMCPY
+    - The device is able to do memory to memory copies
+
+  * DMA_XOR
+    - The device is able to perform XOR operations on memory areas
+    - Used to accelerate XOR intensive tasks, such as RAID5
+
+  * DMA_XOR_VAL
+    - The device is able to perform parity check using the XOR
+      algorithm against a memory buffer.
+
+  * DMA_PQ
+    - The device is able to perform RAID6 P+Q computations, P being a
+      simple XOR, and Q being a Reed-Solomon algorithm.
+
+  * DMA_PQ_VAL
+    - The device is able to perform parity check using RAID6 P+Q
+      algorithm against a memory buffer.
+
+  * DMA_INTERRUPT
+    - The device is able to trigger a dummy transfer that will
+      generate periodic interrupts
+    - Used by the client drivers to register a callback that will be
+      called on a regular basis through the DMA controller interrupt
+
+  * DMA_SG
+    - The device supports memory to memory scatter-gather
+      transfers.
+    - Even though a plain memcpy can look like a particular case of a
+      scatter-gather transfer, with a single chunk to transfer, it's a
+      distinct transaction type in the mem2mem transfers case
+
+  * DMA_PRIVATE
+    - The devices only supports slave transfers, and as such isn't
+      available for async transfers.
+
+  * DMA_ASYNC_TX
+    - Must not be set by the device, and will be set by the framework
+      if needed
+    - /* TODO: What is it about? */
+
+  * DMA_SLAVE
+    - The device can handle device to memory transfers, including
+      scatter-gather transfers.
+    - While in the mem2mem case we were having two distinct types to
+      deal with a single chunk to copy or a collection of them, here,
+      we just have a single transaction type that is supposed to
+      handle both.
+    - If you want to transfer a single contiguous memory buffer,
+      simply build a scatter list with only one item.
+
+  * DMA_CYCLIC
+    - The device can handle cyclic transfers.
+    - A cyclic transfer is a transfer where the chunk collection will
+      loop over itself, with the last item pointing to the first.
+    - It's usually used for audio transfers, where you want to operate
+      on a single ring buffer that you will fill with your audio data.
+
+  * DMA_INTERLEAVE
+    - The device supports interleaved transfer.
+    - These transfers can transfer data from a non-contiguous buffer
+      to a non-contiguous buffer, opposed to DMA_SLAVE that can
+      transfer data from a non-contiguous data set to a continuous
+      destination buffer.
+    - It's usually used for 2d content transfers, in which case you
+      want to transfer a portion of uncompressed data directly to the
+      display to print it
+
+These various types will also affect how the source and destination
+addresses change over time.
+
+Addresses pointing to RAM are typically incremented (or decremented)
+after each transfer. In case of a ring buffer, they may loop
+(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO)
+are typically fixed.
+
+Device operations
+-----------------
+
+Our dma_device structure also requires a few function pointers in
+order to implement the actual logic, now that we described what
+operations we were able to perform.
+
+The functions that we have to fill in there, and hence have to
+implement, obviously depend on the transaction types you reported as
+supported.
+
+   * device_alloc_chan_resources
+   * device_free_chan_resources
+     - These functions will be called whenever a driver will call
+       dma_request_channel or dma_release_channel for the first/last
+       time on the channel associated to that driver.
+     - They are in charge of allocating/freeing all the needed
+       resources in order for that channel to be useful for your
+       driver.
+     - These functions can sleep.
+
+   * device_prep_dma_*
+     - These functions are matching the capabilities you registered
+       previously.
+     - These functions all take the buffer or the scatterlist relevant
+       for the transfer being prepared, and should create a hardware
+       descriptor or a list of hardware descriptors from it
+     - These functions can be called from an interrupt context
+     - Any allocation you might do should be using the GFP_NOWAIT
+       flag, in order not to potentially sleep, but without depleting
+       the emergency pool either.
+     - Drivers should try to pre-allocate any memory they might need
+       during the transfer setup at probe time to avoid putting to
+       much pressure on the nowait allocator.
+
+     - It should return a unique instance of the
+       dma_async_tx_descriptor structure, that further represents this
+       particular transfer.
+
+     - This structure can be initialized using the function
+       dma_async_tx_descriptor_init.
+     - You'll also need to set two fields in this structure:
+       + flags:
+		TODO: Can it be modified by the driver itself, or
+		should it be always the flags passed in the arguments
+
+       + tx_submit:	A pointer to a function you have to implement,
+			that is supposed to push the current
+			transaction descriptor to a pending queue, waiting
+			for issue_pending to be called.
+
+   * device_issue_pending
+     - Takes the first transaction descriptor in the pending queue,
+       and starts the transfer. Whenever that transfer is done, it
+       should move to the next transaction in the list.
+     - This function can be called in an interrupt context
+
+   * device_tx_status
+     - Should report the bytes left to go over on the given channel
+     - Should only care about the transaction descriptor passed as
+       argument, not the currently active one on a given channel
+     - The tx_state argument might be NULL
+     - Should use dma_set_residue to report it
+     - In the case of a cyclic transfer, it should only take into
+       account the current period.
+     - This function can be called in an interrupt context.
+
+   * device_control
+     - Used by client drivers to control and configure the channel it
+       has a handle on.
+     - Called with a command and an argument
+       + The command is one of the values listed by the enum
+         dma_ctrl_cmd. The valid commands are:
+         + DMA_PAUSE
+           + Pauses a transfer on the channel
+           + This command should operate synchronously on the channel,
+             pausing right away the work of the given channel
+         + DMA_RESUME
+           + Restarts a transfer on the channel
+           + This command should operate synchronously on the channel,
+             resuming right away the work of the given channel
+         + DMA_TERMINATE_ALL
+           + Aborts all the pending and ongoing transfers on the
+             channel
+           + This command should operate synchronously on the channel,
+             terminating right away all the channels
+         + DMA_SLAVE_CONFIG
+           + Reconfigures the channel with passed configuration
+           + This command should NOT perform synchronously, or on any
+             currently queued transfers, but only on subsequent ones
+           + In this case, the function will receive a
+             dma_slave_config structure pointer as an argument, that
+             will detail which configuration to use.
+           + Even though that structure contains a direction field,
+             this field is deprecated in favor of the direction
+             argument given to the prep_* functions
+         + FSLDMA_EXTERNAL_START
+           + TODO: Why does that even exist?
+       + The argument is an opaque unsigned long. This actually is a
+         pointer to a struct dma_slave_config that should be used only
+         in the DMA_SLAVE_CONFIG.
+
+  * device_slave_caps
+    - Called through the framework by client drivers in order to have
+      an idea of what are the properties of the channel allocated to
+      them.
+    - Such properties are the buswidth, available directions, etc.
+    - Required for every generic layer doing DMA transfers, such as
+      ASoC.
+
+Misc notes (stuff that should be documented, but don't really know
+where to put them)
+------------------------------------------------------------------
+  * dma_run_dependencies
+    - Should be called at the end of an async TX transfer, and can be
+      ignored in the slave transfers case.
+    - Makes sure that dependent operations are run before marking it
+      as complete.
+
+  * dma_cookie_t
+    - it's a DMA transaction ID that will increment over time.
+    - Not really relevant any more since the introduction of virt-dma
+      that abstracts it away.
+
+  * DMA_CTRL_ACK
+    - Undocumented feature
+    - No one really has an idea of what it's about, besides being
+      related to reusing the DMA transaction descriptors or having
+      additional transactions added to it in the async-tx API
+    - Useless in the case of the slave API
+
+General Design Notes
+--------------------
+
+Most of the DMAEngine drivers you'll see are based on a similar design
+that handles the end of transfer interrupts in the handler, but defer
+most work to a tasklet, including the start of a new transfer whenever
+the previous transfer ended.
+
+This is a rather inefficient design though, because the inter-transfer
+latency will be not only the interrupt latency, but also the
+scheduling latency of the tasklet, which will leave the channel idle
+in between, which will slow down the global transfer rate.
+
+You should avoid this kind of practice, and instead of electing a new
+transfer in your tasklet, move that part to the interrupt handler in
+order to have a shorter idle window (that we can't really avoid
+anyway).
+
+Glossary
+--------
+
+Burst: 		A number of consecutive read or write operations
+		that can be queued to buffers before being flushed to
+		memory.
+Chunk:		A contiguous collection of bursts
+Transfer:	A collection of chunks (be it contiguous or not)
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt
index 9af538be..eede608 100644
--- a/Documentation/email-clients.txt
+++ b/Documentation/email-clients.txt
@@ -77,6 +77,17 @@
 to insert into the message.
 
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Claws Mail (GUI)
+
+Works. Some people use this successfully for patches.
+
+To insert a patch use Message->Insert File (CTRL+i) or an external editor.
+
+If the inserted patch has to be edited in the Claws composition window
+"Auto wrapping" in Configuration->Preferences->Compose->Wrapping should be
+disabled.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Evolution (GUI)
 
 Some people use this successfully for patches.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index eb8a10e..aae9dd1 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1272,7 +1272,7 @@
 
 
 1.9 Ext4 file system parameters
-------------------------------
+-------------------------------
 
 Information about mounted ext4 file systems can be found in
 /proc/fs/ext4.  Each mounted filesystem will have a directory in
diff --git a/Documentation/input/xpad.txt b/Documentation/input/xpad.txt
index 7cc9a43..d1b23f2 100644
--- a/Documentation/input/xpad.txt
+++ b/Documentation/input/xpad.txt
@@ -1,18 +1,22 @@
-xpad - Linux USB driver for X-Box gamepads
+xpad - Linux USB driver for Xbox compatible controllers
 
-This is the very first release of a driver for X-Box gamepads.
-Basically, this was hacked away in just a few hours, so don't expect
-miracles.
+This driver exposes all first-party and third-party Xbox compatible
+controllers. It has a long history and has enjoyed considerable usage
+as Window's xinput library caused most PC games to focus on Xbox
+controller compatibility.
 
-In particular, there is currently NO support for the rumble pack.
-You won't find many ff-aware linux applications anyway.
+Due to backwards compatibility all buttons are reported as digital.
+This only effects Original Xbox controllers. All later controller models
+have only digital face buttons.
+
+Rumble is supported on some models of Xbox 360 controllers but not of
+Original Xbox controllers nor on Xbox One controllers. As of writing
+the Xbox One's rumble protocol has not been reverse engineered but in
+the future could be supported.
 
 
 0. Notes
 --------
-
-Driver updated for kernel 2.6.17.11. (Based on a patch for 2.6.11.4.)
-
 The number of buttons/axes reported varies based on 3 things:
 - if you are using a known controller
 - if you are using a known dance pad
@@ -20,12 +24,16 @@
   module configuration for "Map D-PAD to buttons rather than axes for unknown
   pads" (module option dpad_to_buttons)
 
-If you set dpad_to_buttons to 0 and you are using an unknown device (one
-not listed below), the driver will map the directional pad to axes (X/Y),
-if you said N it will map the d-pad to buttons, which is needed for dance
-style games to function correctly.  The default is Y.
+If you set dpad_to_buttons to N and you are using an unknown device
+the driver will map the directional pad to axes (X/Y).
+If you said Y it will map the d-pad to buttons, which is needed for dance
+style games to function correctly. The default is Y.
 
-dpad_to_buttons has no effect for known pads.
+dpad_to_buttons has no effect for known pads. A erroneous commit message
+claimed dpad_to_buttons could be used to force behavior on known devices.
+This is not true. Both dpad_to_buttons and triggers_to_buttons only affect
+unknown controllers.
+
 
 0.1 Normal Controllers
 ----------------------
@@ -80,17 +88,29 @@
 box in the future.
 
 
-1. USB adapter
+1. USB adapters
 --------------
+All generations of Xbox controllers speak USB over the wire.
+- Original Xbox controllers use a proprietary connector and require adapters.
+- Wireless Xbox 360 controllers require a 'Xbox 360 Wireless Gaming Receiver
+  for Windows'
+- Wired Xbox 360 controllers use standard USB connectors.
+- Xbox One controllers can be wireless but speak Wi-Fi Direct and are not
+  yet supported.
+- Xbox One controllers can be wired and use standard Micro-USB connectors.
 
-Before you can actually use the driver, you need to get yourself an
-adapter cable to connect the X-Box controller to your Linux-Box. You
-can buy these online fairly cheap, or build your own.
+
+
+1.1 Original Xbox USB adapters
+--------------
+Using this driver with an Original Xbox controller requires an
+adapter cable to break out the proprietary connector's pins to USB.
+You can buy these online fairly cheap, or build your own.
 
 Such a cable is pretty easy to build. The Controller itself is a USB
 compound device (a hub with three ports for two expansion slots and
 the controller device) with the only difference in a nonstandard connector
-(5 pins vs. 4 on standard USB connector).
+(5 pins vs. 4 on standard USB 1.0 connectors).
 
 You just need to solder a USB connector onto the cable and keep the
 yellow wire unconnected. The other pins have the same order on both
@@ -102,26 +122,41 @@
 you can still use the controller with your X-Box, if you have one ;)
 
 
+
 2. Driver Installation
 ----------------------
 
-Once you have the adapter cable and the controller is connected, you need
-to load your USB subsystem and should cat /proc/bus/usb/devices.
-There should be an entry like the one at the end [4].
+Once you have the adapter cable, if needed, and the controller connected
+the xpad module should be auto loaded. To confirm you can cat
+/proc/bus/usb/devices. There should be an entry like the one at the end [4].
 
-Currently (as of version 0.0.6), the following devices are included:
- original Microsoft XBOX controller (US), vendor=0x045e, product=0x0202
- smaller  Microsoft XBOX controller (US), vendor=0x045e, product=0x0289
+
+
+3. Supported Controllers
+------------------------
+For a full list of supported controllers and associated vendor and product
+IDs see the xpad_device[] array[6].
+
+As of the historic version 0.0.6 (2006-10-10) the following devices
+were supported:
+ original Microsoft XBOX controller (US),    vendor=0x045e, product=0x0202
+ smaller  Microsoft XBOX controller (US),    vendor=0x045e, product=0x0289
  original Microsoft XBOX controller (Japan), vendor=0x045e, product=0x0285
- InterAct PowerPad Pro (Germany), vendor=0x05fd, product=0x107a
- RedOctane Xbox Dance Pad (US), vendor=0x0c12, product=0x8809
+ InterAct PowerPad Pro (Germany),            vendor=0x05fd, product=0x107a
+ RedOctane Xbox Dance Pad (US),              vendor=0x0c12, product=0x8809
 
-The driver should work with xbox pads not listed above as well, however
-you will need to do something extra for dance pads to work.
+Unrecognized models of Xbox controllers should function as Generic
+Xbox controllers. Unrecognized Dance Pad controllers require setting
+the module option 'dpad_to_buttons'.
 
-If you have a controller not listed above, see 0.3 - Unknown Controllers
+If you have an unrecognized controller please see 0.3 - Unknown Controllers
 
-If you compiled and installed the driver, test the functionality:
+
+4. Manual Testing
+-----------------
+To test this driver's functionality you may use 'jstest'.
+
+For example:
 > modprobe xpad
 > modprobe joydev
 > jstest /dev/js0
@@ -134,7 +169,8 @@
 It works? Voila, you're done ;)
 
 
-3. Thanks
+
+5. Thanks
 ---------
 
 I have to thank ITO Takayuki for the detailed info on his site
@@ -145,14 +181,14 @@
 the basic functionality.
 
 
-4. References
+
+6. References
 -------------
 
-1. http://euc.jp/periphs/xbox-controller.ja.html (ITO Takayuki)
-2. http://xpad.xbox-scene.com/
-3. http://www.markosweb.com/www/xboxhackz.com/ 
-
-4. /proc/bus/usb/devices - dump from InterAct PowerPad Pro (Germany):
+[1]: http://euc.jp/periphs/xbox-controller.ja.html (ITO Takayuki)
+[2]: http://xpad.xbox-scene.com/
+[3]: http://www.markosweb.com/www/xboxhackz.com/
+[4]: /proc/bus/usb/devices - dump from InterAct PowerPad Pro (Germany):
 
 T:  Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#=  5 Spd=12  MxCh= 0
 D:  Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=32 #Cfgs=  1
@@ -162,7 +198,7 @@
 E:  Ad=81(I) Atr=03(Int.) MxPS=  32 Ivl= 10ms
 E:  Ad=02(O) Atr=03(Int.) MxPS=  32 Ivl= 10ms
 
-5. /proc/bus/usb/devices - dump from Redoctane Xbox Dance Pad (US):
+[5]: /proc/bus/usb/devices - dump from Redoctane Xbox Dance Pad (US):
 
 T:  Bus=01 Lev=02 Prnt=09 Port=00 Cnt=01 Dev#= 10 Spd=12  MxCh= 0
 D:  Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs=  1
@@ -173,7 +209,12 @@
 E:  Ad=82(I) Atr=03(Int.) MxPS=  32 Ivl=4ms
 E:  Ad=02(O) Atr=03(Int.) MxPS=  32 Ivl=4ms
 
--- 
+[6]: http://lxr.free-electrons.com/ident?i=xpad_device
+
+
+
+7. Historic Edits
+-----------------
 Marko Friedemann <mfr@bmx-chemnitz.de>
 2002-07-16
  - original doc
@@ -181,3 +222,5 @@
 Dominic Cerquetti <binary1230@yahoo.com>
 2005-03-19
  - added stuff for dance pads, new d-pad->axes mappings
+
+Later changes may be viewed with 'git log Documentation/input/xpad.txt'
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6815364..4a337da 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3447,6 +3447,13 @@
 			neutralize any effect of /proc/sys/kernel/sysrq.
 			Useful for debugging.
 
+	tcpmhash_entries= [KNL,NET]
+			Set the number of tcp_metrics_hash slots.
+			Default value is 8192 or 16384 depending on total
+			ram pages. This is used to specify the TCP metrics
+			cache size. See Documentation/networking/ip-sysctl.txt
+			"tcp_no_metrics_save" section for more details.
+
 	tdfx=		[HW,DRM]
 
 	test_suspend=	[SUSPEND][,N]
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index f87241d..1be59a3 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -173,7 +173,7 @@
 have been initialized properly, as userspace will instantly start to look
 for them when this call happens.
 
-When the kobject is removed from the kernel (details on how to do that is
+When the kobject is removed from the kernel (details on how to do that are
 below), the uevent for KOBJ_REMOVE will be automatically created by the
 kobject core, so the caller does not have to worry about doing that by
 hand.
diff --git a/tools/testing/selftests/README.txt b/Documentation/kselftest.txt
similarity index 73%
rename from tools/testing/selftests/README.txt
rename to Documentation/kselftest.txt
index 2660d5f..a87d840 100644
--- a/tools/testing/selftests/README.txt
+++ b/Documentation/kselftest.txt
@@ -15,37 +15,45 @@
 =============================================================
 
 To build the tests:
-
   $ make -C tools/testing/selftests
 
 
 To run the tests:
-
   $ make -C tools/testing/selftests run_tests
 
+To build and run the tests with a single command, use:
+  $ make kselftest
+
 - note that some tests will require root privileges.
 
-To run only tests targeted for a single subsystem: (including
-hotplug targets in limited mode)
 
-  $  make -C tools/testing/selftests TARGETS=cpu-hotplug run_tests
+Running a subset of selftests
+========================================
+You can use the "TARGETS" variable on the make command line to specify
+single test to run, or a list of tests to run.
 
-See the top-level tools/testing/selftests/Makefile for the list of all possible
-targets.
+To run only tests targeted for a single subsystem:
+  $  make -C tools/testing/selftests TARGETS=ptrace run_tests
+
+You can specify multiple tests to build and run:
+  $  make TARGETS="size timers" kselftest
+
+See the top-level tools/testing/selftests/Makefile for the list of all
+possible targets.
+
 
 Running the full range hotplug selftests
 ========================================
 
-To build the tests:
-
+To build the hotplug tests:
   $ make -C tools/testing/selftests hotplug
 
-To run the tests:
-
+To run the hotplug tests:
   $ make -C tools/testing/selftests run_hotplug
 
 - note that some tests will require root privileges.
 
+
 Contributing new tests
 ======================
 
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
index 60f43ff..1092ad9 100644
--- a/Documentation/mailbox.txt
+++ b/Documentation/mailbox.txt
@@ -53,7 +53,7 @@
 {
 	struct demo_client *dc = container_of(mbox_client,
 						struct demo_client, cl);
-	if (dc->aysnc) {
+	if (dc->async) {
 		if (is_an_ack(mssg)) {
 			/* An ACK to our last sample sent */
 			return; /* Or do something else here */
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7ee2ae6..70a09f8 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1633,6 +1633,48 @@
      operations" subsection for information on where to use these.
 
 
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+     These are for use with consistent memory to guarantee the ordering
+     of writes or reads of shared memory accessible to both the CPU and a
+     DMA capable device.
+
+     For example, consider a device driver that shares memory with a device
+     and uses a descriptor status value to indicate if the descriptor belongs
+     to the device or the CPU, and a doorbell to notify it when new
+     descriptors are available:
+
+	if (desc->status != DEVICE_OWN) {
+		/* do not read data until we own descriptor */
+		dma_rmb();
+
+		/* read/modify data */
+		read_data = desc->data;
+		desc->data = write_data;
+
+		/* flush modifications before status update */
+		dma_wmb();
+
+		/* assign ownership */
+		desc->status = DEVICE_OWN;
+
+		/* force memory to sync before notifying device via MMIO */
+		wmb();
+
+		/* notify device of new descriptors */
+		writel(DESC_NOTIFY, doorbell);
+	}
+
+     The dma_rmb() allows us guarantee the device has released ownership
+     before we read the data from the descriptor, and he dma_wmb() allows
+     us to guarantee the data is written to the descriptor before the device
+     can see it now has ownership.  The wmb() is needed to guarantee that the
+     cache coherent memory writes have completed before attempting a write to
+     the cache incoherent MMIO region.
+
+     See Documentation/DMA-API.txt for more information on consistent memory.
+
 MMIO WRITE BARRIER
 ------------------
 
diff --git a/Documentation/mic/mpssd/Makefile b/Documentation/mic/mpssd/Makefile
index 0f31568..f47fe6b 100644
--- a/Documentation/mic/mpssd/Makefile
+++ b/Documentation/mic/mpssd/Makefile
@@ -1,5 +1,5 @@
 # List of programs to build
-hostprogs-y := mpssd
+hostprogs-$(CONFIG_X86_64) := mpssd
 
 mpssd-objs := mpssd.o sysfs.o
 
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index f32ce54..44fe1d2 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -229,13 +229,13 @@
     - if set, the value of child_count is ignored (but still updated)
 
   unsigned int disable_depth;
-    - used for disabling the helper funcions (they work normally if this is
+    - used for disabling the helper functions (they work normally if this is
       equal to zero); the initial value of it is 1 (i.e. runtime PM is
       initially disabled for all devices)
 
   int runtime_error;
     - if set, there was a fatal error (one of the callbacks returned error code
-      as described in Section 2), so the helper funtions will not work until
+      as described in Section 2), so the helper functions will not work until
       this flag is cleared; this is the error code returned by the failing
       callback
 
@@ -468,6 +468,10 @@
     - set the power.irq_safe flag for the device, causing the runtime-PM
       callbacks to be invoked with interrupts off
 
+  bool pm_runtime_is_irq_safe(struct device *dev);
+    - return true if power.irq_safe flag was set for the device, causing
+      the runtime-PM callbacks to be invoked with interrupts off
+
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
 
@@ -524,7 +528,7 @@
 5. Runtime PM Initialization, Device Probing and Removal
 
 Initially, the runtime PM is disabled for all devices, which means that the
-majority of the runtime PM helper funtions described in Section 4 will return
+majority of the runtime PM helper functions described in Section 4 will return
 -EAGAIN until pm_runtime_enable() is called for the device.
 
 In addition to that, the initial runtime PM status of all devices is
diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt
index 6966364..2f9c5a5 100644
--- a/Documentation/power/suspend-and-interrupts.txt
+++ b/Documentation/power/suspend-and-interrupts.txt
@@ -77,7 +77,7 @@
 in a special way.  Namely, the IRQ remains enabled, by on the first interrupt
 it will be disabled, marked as pending and "suspended" so that it will be
 re-enabled by resume_device_irqs() during the subsequent system resume.  Also
-the PM core is notified about the event which casues the system suspend in
+the PM core is notified about the event which causes the system suspend in
 progress to be aborted (that doesn't have to happen immediately, but at one
 of the points where the suspend thread looks for pending wakeup events).
 
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
index 0e87082..bbfcd1bb 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.txt
@@ -99,7 +99,7 @@
 The device's read() operation can be used to transfer the snapshot image from
 the kernel.  It has the following limitations:
 - you cannot read() more than one virtual memory page at a time
-- read()s across page boundaries are impossible (ie. if ypu read() 1/2 of
+- read()s across page boundaries are impossible (ie. if you read() 1/2 of
 	a page in the previous call, you will only be able to read()
 	_at_ _most_ 1/2 of the page in the next call)
 
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index b64e0af..f2d3a10 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -1,8 +1,8 @@
 
 The intent of this file is to give a brief summary of hugetlbpage support in
 the Linux kernel.  This support is built on top of multiple page size support
-that is provided by most modern architectures.  For example, i386
-architecture supports 4K and 4M (2M in PAE mode) page sizes, ia64
+that is provided by most modern architectures.  For example, x86 CPUs normally
+support 4K and 2M (1G if architecturally supported) page sizes, ia64
 architecture supports multiple page sizes 4K, 8K, 64K, 256K, 1M, 4M, 16M,
 256M and ppc64 supports 4K and 16M.  A TLB is a cache of virtual-to-physical
 translations.  Typically this is a very scarce resource on processor.
diff --git a/MAINTAINERS b/MAINTAINERS
index 68764f6..1f0ef48 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1722,6 +1722,13 @@
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
+ATMEL XDMA DRIVER
+M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+L:	linux-arm-kernel@lists.infradead.org
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/at_xdmac.c
+
 ATMEL I2C DRIVER
 M:	Ludovic Desroches <ludovic.desroches@atmel.com>
 L:	linux-i2c@vger.kernel.org
@@ -3162,7 +3169,8 @@
 S:	Maintained
 F:	drivers/dma/
 F:	include/linux/dma*
-T:	git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma)
+F:	Documentation/dmaengine/
+T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h
index 3832bdb..77516c8 100644
--- a/arch/alpha/include/asm/barrier.h
+++ b/arch/alpha/include/asm/barrier.h
@@ -7,6 +7,57 @@
 #define rmb()	__asm__ __volatile__("mb": : :"memory")
 #define wmb()	__asm__ __volatile__("wmb": : :"memory")
 
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
 #define read_barrier_depends() __asm__ __volatile__("mb": : :"memory")
 
 #ifdef CONFIG_SMP
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2160091..97d07ed 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -688,7 +688,9 @@
 	select CPU_SA1100
 	select GENERIC_CLOCKEVENTS
 	select HAVE_IDE
+	select IRQ_DOMAIN
 	select ISA
+	select MULTI_IRQ_HANDLER
 	select NEED_MACH_MEMORY_H
 	select SPARSE_IRQ
 	help
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index e57d7e5..7b69c5f 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -282,8 +282,8 @@
 	}
 
 	if (i == 8)
-		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", d->irq);
+		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
+		       d->irq);
 	return i == 8 ? -1 : 0;
 }
 
@@ -384,8 +384,8 @@
 	}
 
 	if (i == 8)
-		printk(KERN_ERR "Danger Will Robinson: failed to "
-			"re-trigger IRQ%d\n", d->irq);
+		pr_err("Danger Will Robinson: failed to re-trigger IRQ%d\n",
+		       d->irq);
 	return i == 8 ? -1 : 0;
 }
 
@@ -740,9 +740,8 @@
 		goto err_unmap;
 	}
 
-	printk(KERN_INFO "SA1111 Microprocessor Companion Chip: "
-		"silicon revision %lx, metal revision %lx\n",
-		(id & SKID_SIREV_MASK)>>4, (id & SKID_MTREV_MASK));
+	pr_info("SA1111 Microprocessor Companion Chip: silicon revision %lx, metal revision %lx\n",
+		(id & SKID_SIREV_MASK) >> 4, id & SKID_MTREV_MASK);
 
 	/*
 	 * We found it.  Wake the chip up, and initialise.
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index c6a3e73..d2f81e6 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -43,10 +43,14 @@
 #define mb()		do { dsb(); outer_sync(); } while (0)
 #define rmb()		dsb()
 #define wmb()		do { dsb(st); outer_sync(); } while (0)
+#define dma_rmb()	dmb(osh)
+#define dma_wmb()	dmb(oshst)
 #else
 #define mb()		barrier()
 #define rmb()		barrier()
 #define wmb()		barrier()
+#define dma_rmb()	barrier()
+#define dma_wmb()	barrier()
 #endif
 
 #ifndef CONFIG_SMP
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 10e78d0..2d46862 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -487,6 +487,16 @@
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
 void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
 			     void *kaddr, unsigned long len);
+
 #endif
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index 74124b0..0415eae 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -2,27 +2,24 @@
 #define _ASM_FIXMAP_H
 
 #define FIXADDR_START		0xffc00000UL
-#define FIXADDR_TOP		0xffe00000UL
-#define FIXADDR_SIZE		(FIXADDR_TOP - FIXADDR_START)
+#define FIXADDR_END		0xfff00000UL
+#define FIXADDR_TOP		(FIXADDR_END - PAGE_SIZE)
 
-#define FIX_KMAP_NR_PTES	(FIXADDR_SIZE >> PAGE_SHIFT)
+#include <asm/kmap_types.h>
 
-#define __fix_to_virt(x)	(FIXADDR_START + ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x)	(((x) - FIXADDR_START) >> PAGE_SHIFT)
+enum fixed_addresses {
+	FIX_KMAP_BEGIN,
+	FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
 
-extern void __this_fixmap_does_not_exist(void);
+	/* Support writing RO kernel text via kprobes, jump labels, etc. */
+	FIX_TEXT_POKE0,
+	FIX_TEXT_POKE1,
 
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
-	if (idx >= FIX_KMAP_NR_PTES)
-		__this_fixmap_does_not_exist();
-	return __fix_to_virt(idx);
-}
+	__end_of_fixed_addresses
+};
 
-static inline unsigned int virt_to_fix(const unsigned long vaddr)
-{
-	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
-	return __virt_to_fix(vaddr);
-}
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
+
+#include <asm-generic/fixmap.h>
 
 #endif
diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
index a71b417..af79da4 100644
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@@ -8,6 +8,7 @@
 {
 	extern unsigned long irq_err_count;
 	irq_err_count++;
+	pr_crit("unexpected IRQ trap at vector %02x\n", irq);
 }
 
 void set_irq_flags(unsigned int irq, unsigned int flags);
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index d428e38..3446f6a 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -219,6 +219,23 @@
 bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
 int __mcpm_cluster_state(unsigned int cluster);
 
+/**
+ * mcpm_sync_init - Initialize the cluster synchronization support
+ *
+ * @power_up_setup: platform specific function invoked during very
+ * 		    early CPU/cluster bringup stage.
+ *
+ * This prepares memory used by vlocks and the MCPM state machine used
+ * across CPUs that may have their caches active or inactive. Must be
+ * called only after a successful call to mcpm_platform_register().
+ *
+ * The power_up_setup argument is a pointer to assembly code called when
+ * the MMU and caches are still disabled during boot  and no stack space is
+ * available. The affinity level passed to that code corresponds to the
+ * resource that needs to be initialized (e.g. 1 for cluster level, 0 for
+ * CPU level).  Proper exclusion mechanisms are already activated at that
+ * point.
+ */
 int __init mcpm_sync_init(
 	void (*power_up_setup)(unsigned int affinity_level));
 
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index 209e650..a89b407 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -30,14 +30,14 @@
 static inline unsigned long __my_cpu_offset(void)
 {
 	unsigned long off;
-	register unsigned long *sp asm ("sp");
 
 	/*
 	 * Read TPIDRPRW.
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
-	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off) : "Q" (*sp));
+	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
+		: "Q" (*(const unsigned long *)current_stack_pointer));
 
 	return off;
 }
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 78a7793..19cfab5 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -157,7 +157,15 @@
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
 {
-	__pmd_populate(pmdp, page_to_phys(ptep), _PAGE_USER_TABLE);
+	extern pmdval_t user_pmd_table;
+	pmdval_t prot;
+
+	if (__LINUX_ARM_ARCH__ >= 6 && !IS_ENABLED(CONFIG_ARM_LPAE))
+		prot = user_pmd_table;
+	else
+		prot = _PAGE_USER_TABLE;
+
+	__pmd_populate(pmdp, page_to_phys(ptep), prot);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index 5cfba15..5e68278 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -20,12 +20,14 @@
 #define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
 #define PMD_TYPE_TABLE		(_AT(pmdval_t, 1) << 0)
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 2) << 0)
+#define PMD_PXNTABLE		(_AT(pmdval_t, 1) << 2)     /* v7 */
 #define PMD_BIT4		(_AT(pmdval_t, 1) << 4)
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, (x)) << 5)
 #define PMD_PROTECTION		(_AT(pmdval_t, 1) << 9)		/* v5 */
 /*
  *   - section
  */
+#define PMD_SECT_PXN    (_AT(pmdval_t, 1) << 0)     /* v7 */
 #define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
 #define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
 #define PMD_SECT_XN		(_AT(pmdval_t, 1) << 4)		/* v6 */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 9fd61c7..f8f1cff 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -76,6 +76,7 @@
 #define PTE_EXT_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define PTE_EXT_AF		(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_EXT_NG		(_AT(pteval_t, 1) << 11)	/* nG */
+#define PTE_EXT_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
 #define PTE_EXT_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 
 /*
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 3b30062..d5cac54 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -252,17 +252,57 @@
 	set_pte_ext(ptep, pteval, ext);
 }
 
-#define PTE_BIT_FUNC(fn,op) \
-static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) &= ~pgprot_val(prot);
+	return pte;
+}
 
-PTE_BIT_FUNC(wrprotect, |= L_PTE_RDONLY);
-PTE_BIT_FUNC(mkwrite,   &= ~L_PTE_RDONLY);
-PTE_BIT_FUNC(mkclean,   &= ~L_PTE_DIRTY);
-PTE_BIT_FUNC(mkdirty,   |= L_PTE_DIRTY);
-PTE_BIT_FUNC(mkold,     &= ~L_PTE_YOUNG);
-PTE_BIT_FUNC(mkyoung,   |= L_PTE_YOUNG);
-PTE_BIT_FUNC(mkexec,   &= ~L_PTE_XN);
-PTE_BIT_FUNC(mknexec,   |= L_PTE_XN);
+static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) |= pgprot_val(prot);
+	return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_DIRTY));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_DIRTY));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_YOUNG));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_YOUNG));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(L_PTE_XN));
+}
+
+static inline pte_t pte_mknexec(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(L_PTE_XN));
+}
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 601264d..51622ba 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -154,9 +154,8 @@
 	return regs->ARM_sp;
 }
 
-#define current_pt_regs(void) ({				\
-	register unsigned long sp asm ("sp");			\
-	(struct pt_regs *)((sp | (THREAD_SIZE - 1)) - 7) - 1;	\
+#define current_pt_regs(void) ({ (struct pt_regs *)			\
+		((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1;	\
 })
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index ce73ab6..d890e41 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -90,14 +90,19 @@
 #define init_stack		(init_thread_union.stack)
 
 /*
+ * how to get the current stack pointer in C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+/*
  * how to get the thread information struct from C
  */
 static inline struct thread_info *current_thread_info(void) __attribute_const__;
 
 static inline struct thread_info *current_thread_info(void)
 {
-	register unsigned long sp asm ("sp");
-	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
+	return (struct thread_info *)
+		(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index f4ab34f..ee5f308 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -22,6 +22,7 @@
 #define FPSID_NODOUBLE		(1<<20)
 #define FPSID_ARCH_BIT		(16)
 #define FPSID_ARCH_MASK		(0xF  << FPSID_ARCH_BIT)
+#define FPSID_CPUID_ARCH_MASK	(0x7F  << FPSID_ARCH_BIT)
 #define FPSID_PART_BIT		(8)
 #define FPSID_PART_MASK		(0xFF << FPSID_PART_BIT)
 #define FPSID_VARIANT_BIT	(4)
@@ -75,6 +76,10 @@
 /* MVFR0 bits */
 #define MVFR0_A_SIMD_BIT	(0)
 #define MVFR0_A_SIMD_MASK	(0xf << MVFR0_A_SIMD_BIT)
+#define MVFR0_SP_BIT		(4)
+#define MVFR0_SP_MASK		(0xf << MVFR0_SP_BIT)
+#define MVFR0_DP_BIT		(8)
+#define MVFR0_DP_MASK		(0xf << MVFR0_DP_BIT)
 
 /* Bit patterns for decoding the packaged operation descriptors */
 #define VFPOPDESC_LENGTH_BIT	(9)
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 8dcbed5..f290ac8 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
 obj-$(CONFIG_HAVE_ARM_TWD)	+= smp_twd.o
 obj-$(CONFIG_ARM_ARCH_TIMER)	+= arch_timer.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= entry-ftrace.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o insn.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o insn.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o insn.o patch.o
@@ -67,7 +68,7 @@
 endif
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
-obj-$(CONFIG_KGDB)		+= kgdb.o
+obj-$(CONFIG_KGDB)		+= kgdb.o patch.o
 obj-$(CONFIG_ARM_UNWIND)	+= unwind.o
 obj-$(CONFIG_HAVE_TCM)		+= tcm.o
 obj-$(CONFIG_OF)		+= devtree.o
@@ -84,6 +85,7 @@
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o perf_event_cpu.o
+CFLAGS_pj4-cp0.o		:= -marm
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
 obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
 
diff --git a/arch/arm/kernel/atags_compat.c b/arch/arm/kernel/atags_compat.c
index 5236ad3..05c28b1 100644
--- a/arch/arm/kernel/atags_compat.c
+++ b/arch/arm/kernel/atags_compat.c
@@ -97,8 +97,7 @@
 	struct tag *tag = taglist;
 
 	if (params->u1.s.page_size != PAGE_SIZE) {
-		printk(KERN_WARNING "Warning: bad configuration page, "
-		       "trying to continue\n");
+		pr_warn("Warning: bad configuration page, trying to continue\n");
 		return;
 	}
 
@@ -109,8 +108,7 @@
 	    params->u1.s.nr_pages != 0x04000 &&
 	    params->u1.s.nr_pages != 0x08000 &&
 	    params->u1.s.nr_pages != 0x10000) {
-		printk(KERN_WARNING "Warning: bad NeTTrom parameters "
-		       "detected, using defaults\n");
+		pr_warn("Warning: bad NeTTrom parameters detected, using defaults\n");
 
 		params->u1.s.nr_pages = 0x1000;	/* 16MB */
 		params->u1.s.ramdisk_size = 0;
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 528f8af..68c6ae0 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -167,8 +167,7 @@
 {
 	for (; t->hdr.size; t = tag_next(t))
 		if (!parse_tag(t))
-			printk(KERN_WARNING
-				"Ignoring unrecognised tag 0x%08x\n",
+			pr_warn("Ignoring unrecognised tag 0x%08x\n",
 				t->hdr.tag);
 }
 
@@ -193,7 +192,7 @@
 	 */
 	for_each_machine_desc(p)
 		if (machine_nr == p->nr) {
-			printk("Machine: %s\n", p->name);
+			pr_info("Machine: %s\n", p->name);
 			mdesc = p;
 			break;
 		}
diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c
index c7ff807..5a33790 100644
--- a/arch/arm/kernel/atags_proc.c
+++ b/arch/arm/kernel/atags_proc.c
@@ -41,7 +41,7 @@
 	size_t size;
 
 	if (tag->hdr.tag != ATAG_CORE) {
-		printk(KERN_INFO "No ATAGs?");
+		pr_info("No ATAGs?");
 		return -EINVAL;
 	}
 
@@ -68,7 +68,7 @@
 
 nomem:
 	kfree(b);
-	printk(KERN_ERR "Exporting ATAGs: not enough memory\n");
+	pr_err("Exporting ATAGs: not enough memory\n");
 
 	return -ENOMEM;
 }
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index daaff73b..a4effd6 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -364,7 +364,7 @@
 	/*
 	 * Report what we did for this bus
 	 */
-	printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n",
+	pr_info("PCI: bus%d: Fast back to back transfers %sabled\n",
 		bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c
index 360bb6d..84363fe 100644
--- a/arch/arm/kernel/dma-isa.c
+++ b/arch/arm/kernel/dma-isa.c
@@ -213,8 +213,8 @@
 		for (chan = 0; chan < 8; chan++) {
 			int ret = isa_dma_add(chan, &isa_dma[chan]);
 			if (ret)
-				printk(KERN_ERR "ISADMA%u: unable to register: %d\n",
-					chan, ret);
+				pr_err("ISADMA%u: unable to register: %d\n",
+				       chan, ret);
 		}
 
 		request_dma(DMA_ISA_CASCADE, "cascade");
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index 7b829d9..e651c4d 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -79,7 +79,7 @@
 	return ret;
 
 bad_dma:
-	printk(KERN_ERR "dma: trying to allocate DMA%d\n", chan);
+	pr_err("dma: trying to allocate DMA%d\n", chan);
 	return -EINVAL;
 
 busy:
@@ -100,7 +100,7 @@
 		goto bad_dma;
 
 	if (dma->active) {
-		printk(KERN_ERR "dma%d: freeing active DMA\n", chan);
+		pr_err("dma%d: freeing active DMA\n", chan);
 		dma->d_ops->disable(chan, dma);
 		dma->active = 0;
 	}
@@ -111,11 +111,11 @@
 		return;
 	}
 
-	printk(KERN_ERR "dma%d: trying to free free DMA\n", chan);
+	pr_err("dma%d: trying to free free DMA\n", chan);
 	return;
 
 bad_dma:
-	printk(KERN_ERR "dma: trying to free DMA%d\n", chan);
+	pr_err("dma: trying to free DMA%d\n", chan);
 }
 EXPORT_SYMBOL(free_dma);
 
@@ -126,8 +126,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA SG while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA SG while DMA active\n", chan);
 
 	dma->sg = sg;
 	dma->sgcount = nr_sg;
@@ -144,8 +143,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA address while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA address while DMA active\n", chan);
 
 	dma->sg = NULL;
 	dma->addr = addr;
@@ -162,8 +160,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA count while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA count while DMA active\n", chan);
 
 	dma->sg = NULL;
 	dma->count = count;
@@ -178,8 +175,7 @@
 	dma_t *dma = dma_channel(chan);
 
 	if (dma->active)
-		printk(KERN_ERR "dma%d: altering DMA mode while "
-		       "DMA active\n", chan);
+		pr_err("dma%d: altering DMA mode while DMA active\n", chan);
 
 	dma->dma_mode = mode;
 	dma->invalid = 1;
@@ -202,7 +198,7 @@
 	return;
 
 free_dma:
-	printk(KERN_ERR "dma%d: trying to enable free DMA\n", chan);
+	pr_err("dma%d: trying to enable free DMA\n", chan);
 	BUG();
 }
 EXPORT_SYMBOL(enable_dma);
@@ -223,7 +219,7 @@
 	return;
 
 free_dma:
-	printk(KERN_ERR "dma%d: trying to disable free DMA\n", chan);
+	pr_err("dma%d: trying to disable free DMA\n", chan);
 	BUG();
 }
 EXPORT_SYMBOL(disable_dma);
@@ -240,7 +236,7 @@
 
 void set_dma_page(unsigned int chan, char pagenr)
 {
-	printk(KERN_ERR "dma%d: trying to set_dma_page\n", chan);
+	pr_err("dma%d: trying to set_dma_page\n", chan);
 }
 EXPORT_SYMBOL(set_dma_page);
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 6bb09d4..f8ccc21 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -109,241 +109,6 @@
 #undef CALL
 #define CALL(x) .long x
 
-#ifdef CONFIG_FUNCTION_TRACER
-/*
- * When compiling with -pg, gcc inserts a call to the mcount routine at the
- * start of every function.  In mcount, apart from the function's address (in
- * lr), we need to get hold of the function's caller's address.
- *
- * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this:
- *
- *	bl	mcount
- *
- * These versions have the limitation that in order for the mcount routine to
- * be able to determine the function's caller's address, an APCS-style frame
- * pointer (which is set up with something like the code below) is required.
- *
- *	mov     ip, sp
- *	push    {fp, ip, lr, pc}
- *	sub     fp, ip, #4
- *
- * With EABI, these frame pointers are not available unless -mapcs-frame is
- * specified, and if building as Thumb-2, not even then.
- *
- * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount,
- * with call sites like:
- *
- *	push	{lr}
- *	bl	__gnu_mcount_nc
- *
- * With these compilers, frame pointers are not necessary.
- *
- * mcount can be thought of as a function called in the middle of a subroutine
- * call.  As such, it needs to be transparent for both the caller and the
- * callee: the original lr needs to be restored when leaving mcount, and no
- * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
- * clobber the ip register.  This is OK because the ARM calling convention
- * allows it to be clobbered in subroutines and doesn't use it to hold
- * parameters.)
- *
- * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
- * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
- * arch/arm/kernel/ftrace.c).
- */
-
-#ifndef CONFIG_OLD_MCOUNT
-#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
-#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
-#endif
-#endif
-
-.macro mcount_adjust_addr rd, rn
-	bic	\rd, \rn, #1		@ clear the Thumb bit if present
-	sub	\rd, \rd, #MCOUNT_INSN_SIZE
-.endm
-
-.macro __mcount suffix
-	mcount_enter
-	ldr	r0, =ftrace_trace_function
-	ldr	r2, [r0]
-	adr	r0, .Lftrace_stub
-	cmp	r0, r2
-	bne	1f
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	ldr     r1, =ftrace_graph_return
-	ldr     r2, [r1]
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-
-	ldr     r1, =ftrace_graph_entry
-	ldr     r2, [r1]
-	ldr     r0, =ftrace_graph_entry_stub
-	cmp     r0, r2
-	bne     ftrace_graph_caller\suffix
-#endif
-
-	mcount_exit
-
-1: 	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-	adr	lr, BSYM(2f)
-	mov	pc, r2
-2:	mcount_exit
-.endm
-
-.macro __ftrace_caller suffix
-	mcount_enter
-
-	mcount_get_lr	r1			@ lr of instrumented func
-	mcount_adjust_addr	r0, lr		@ instrumented function
-
-	.globl ftrace_call\suffix
-ftrace_call\suffix:
-	bl	ftrace_stub
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl ftrace_graph_call\suffix
-ftrace_graph_call\suffix:
-	mov	r0, r0
-#endif
-
-	mcount_exit
-.endm
-
-.macro __ftrace_graph_caller
-	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	@ called from __ftrace_caller, saved in mcount_enter
-	ldr	r1, [sp, #16]		@ instrumented routine (func)
-	mcount_adjust_addr	r1, r1
-#else
-	@ called from __mcount, untouched in lr
-	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
-#endif
-	mov	r2, fp			@ frame pointer
-	bl	prepare_ftrace_return
-	mcount_exit
-.endm
-
-#ifdef CONFIG_OLD_MCOUNT
-/*
- * mcount
- */
-
-.macro mcount_enter
-	stmdb	sp!, {r0-r3, lr}
-.endm
-
-.macro mcount_get_lr reg
-	ldr	\reg, [fp, #-4]
-.endm
-
-.macro mcount_exit
-	ldr	lr, [fp, #-4]
-	ldmia	sp!, {r0-r3, pc}
-.endm
-
-ENTRY(mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	stmdb	sp!, {lr}
-	ldr	lr, [fp, #-4]
-	ldmia	sp!, {pc}
-#else
-	__mcount _old
-#endif
-ENDPROC(mcount)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller_old)
-	__ftrace_caller _old
-ENDPROC(ftrace_caller_old)
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller_old)
-	__ftrace_graph_caller
-ENDPROC(ftrace_graph_caller_old)
-#endif
-
-.purgem mcount_enter
-.purgem mcount_get_lr
-.purgem mcount_exit
-#endif
-
-/*
- * __gnu_mcount_nc
- */
-
-.macro mcount_enter
-/*
- * This pad compensates for the push {lr} at the call site.  Note that we are
- * unable to unwind through a function which does not otherwise save its lr.
- */
- UNWIND(.pad	#4)
-	stmdb	sp!, {r0-r3, lr}
- UNWIND(.save	{r0-r3, lr})
-.endm
-
-.macro mcount_get_lr reg
-	ldr	\reg, [sp, #20]
-.endm
-
-.macro mcount_exit
-	ldmia	sp!, {r0-r3, ip, lr}
-	ret	ip
-.endm
-
-ENTRY(__gnu_mcount_nc)
-UNWIND(.fnstart)
-#ifdef CONFIG_DYNAMIC_FTRACE
-	mov	ip, lr
-	ldmia	sp!, {lr}
-	ret	ip
-#else
-	__mcount
-#endif
-UNWIND(.fnend)
-ENDPROC(__gnu_mcount_nc)
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(ftrace_caller)
-UNWIND(.fnstart)
-	__ftrace_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_caller)
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
-UNWIND(.fnstart)
-	__ftrace_graph_caller
-UNWIND(.fnend)
-ENDPROC(ftrace_graph_caller)
-#endif
-
-.purgem mcount_enter
-.purgem mcount_get_lr
-.purgem mcount_exit
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	.globl return_to_handler
-return_to_handler:
-	stmdb	sp!, {r0-r3}
-	mov	r0, fp			@ frame pointer
-	bl	ftrace_return_to_handler
-	mov	lr, r0			@ r0 has real ret addr
-	ldmia	sp!, {r0-r3}
-	ret	lr
-#endif
-
-ENTRY(ftrace_stub)
-.Lftrace_stub:
-	ret	lr
-ENDPROC(ftrace_stub)
-
-#endif /* CONFIG_FUNCTION_TRACER */
-
 /*=============================================================================
  * SWI handler
  *-----------------------------------------------------------------------------
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
new file mode 100644
index 0000000..fe57c73
--- /dev/null
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -0,0 +1,243 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/assembler.h>
+#include <asm/ftrace.h>
+#include <asm/unwind.h>
+
+#include "entry-header.S"
+
+/*
+ * When compiling with -pg, gcc inserts a call to the mcount routine at the
+ * start of every function.  In mcount, apart from the function's address (in
+ * lr), we need to get hold of the function's caller's address.
+ *
+ * Older GCCs (pre-4.4) inserted a call to a routine called mcount like this:
+ *
+ *	bl	mcount
+ *
+ * These versions have the limitation that in order for the mcount routine to
+ * be able to determine the function's caller's address, an APCS-style frame
+ * pointer (which is set up with something like the code below) is required.
+ *
+ *	mov     ip, sp
+ *	push    {fp, ip, lr, pc}
+ *	sub     fp, ip, #4
+ *
+ * With EABI, these frame pointers are not available unless -mapcs-frame is
+ * specified, and if building as Thumb-2, not even then.
+ *
+ * Newer GCCs (4.4+) solve this problem by introducing a new version of mcount,
+ * with call sites like:
+ *
+ *	push	{lr}
+ *	bl	__gnu_mcount_nc
+ *
+ * With these compilers, frame pointers are not necessary.
+ *
+ * mcount can be thought of as a function called in the middle of a subroutine
+ * call.  As such, it needs to be transparent for both the caller and the
+ * callee: the original lr needs to be restored when leaving mcount, and no
+ * registers should be clobbered.  (In the __gnu_mcount_nc implementation, we
+ * clobber the ip register.  This is OK because the ARM calling convention
+ * allows it to be clobbered in subroutines and doesn't use it to hold
+ * parameters.)
+ *
+ * When using dynamic ftrace, we patch out the mcount call by a "mov r0, r0"
+ * for the mcount case, and a "pop {lr}" for the __gnu_mcount_nc case (see
+ * arch/arm/kernel/ftrace.c).
+ */
+
+#ifndef CONFIG_OLD_MCOUNT
+#if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 4))
+#error Ftrace requires CONFIG_FRAME_POINTER=y with GCC older than 4.4.0.
+#endif
+#endif
+
+.macro mcount_adjust_addr rd, rn
+	bic	\rd, \rn, #1		@ clear the Thumb bit if present
+	sub	\rd, \rd, #MCOUNT_INSN_SIZE
+.endm
+
+.macro __mcount suffix
+	mcount_enter
+	ldr	r0, =ftrace_trace_function
+	ldr	r2, [r0]
+	adr	r0, .Lftrace_stub
+	cmp	r0, r2
+	bne	1f
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ldr     r1, =ftrace_graph_return
+	ldr     r2, [r1]
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+
+	ldr     r1, =ftrace_graph_entry
+	ldr     r2, [r1]
+	ldr     r0, =ftrace_graph_entry_stub
+	cmp     r0, r2
+	bne     ftrace_graph_caller\suffix
+#endif
+
+	mcount_exit
+
+1: 	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+	adr	lr, BSYM(2f)
+	mov	pc, r2
+2:	mcount_exit
+.endm
+
+.macro __ftrace_caller suffix
+	mcount_enter
+
+	mcount_get_lr	r1			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+
+	.globl ftrace_call\suffix
+ftrace_call\suffix:
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl ftrace_graph_call\suffix
+ftrace_graph_call\suffix:
+	mov	r0, r0
+#endif
+
+	mcount_exit
+.endm
+
+.macro __ftrace_graph_caller
+	sub	r0, fp, #4		@ &lr of instrumented routine (&parent)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	@ called from __ftrace_caller, saved in mcount_enter
+	ldr	r1, [sp, #16]		@ instrumented routine (func)
+	mcount_adjust_addr	r1, r1
+#else
+	@ called from __mcount, untouched in lr
+	mcount_adjust_addr	r1, lr	@ instrumented routine (func)
+#endif
+	mov	r2, fp			@ frame pointer
+	bl	prepare_ftrace_return
+	mcount_exit
+.endm
+
+#ifdef CONFIG_OLD_MCOUNT
+/*
+ * mcount
+ */
+
+.macro mcount_enter
+	stmdb	sp!, {r0-r3, lr}
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [fp, #-4]
+.endm
+
+.macro mcount_exit
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {r0-r3, pc}
+.endm
+
+ENTRY(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	stmdb	sp!, {lr}
+	ldr	lr, [fp, #-4]
+	ldmia	sp!, {pc}
+#else
+	__mcount _old
+#endif
+ENDPROC(mcount)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller_old)
+	__ftrace_caller _old
+ENDPROC(ftrace_caller_old)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller_old)
+	__ftrace_graph_caller
+ENDPROC(ftrace_graph_caller_old)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+#endif
+
+/*
+ * __gnu_mcount_nc
+ */
+
+.macro mcount_enter
+/*
+ * This pad compensates for the push {lr} at the call site.  Note that we are
+ * unable to unwind through a function which does not otherwise save its lr.
+ */
+ UNWIND(.pad	#4)
+	stmdb	sp!, {r0-r3, lr}
+ UNWIND(.save	{r0-r3, lr})
+.endm
+
+.macro mcount_get_lr reg
+	ldr	\reg, [sp, #20]
+.endm
+
+.macro mcount_exit
+	ldmia	sp!, {r0-r3, ip, lr}
+	ret	ip
+.endm
+
+ENTRY(__gnu_mcount_nc)
+UNWIND(.fnstart)
+#ifdef CONFIG_DYNAMIC_FTRACE
+	mov	ip, lr
+	ldmia	sp!, {lr}
+	ret	ip
+#else
+	__mcount
+#endif
+UNWIND(.fnend)
+ENDPROC(__gnu_mcount_nc)
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller)
+UNWIND(.fnstart)
+	__ftrace_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_caller)
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+UNWIND(.fnstart)
+	__ftrace_graph_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_graph_caller)
+#endif
+
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl return_to_handler
+return_to_handler:
+	stmdb	sp!, {r0-r3}
+	mov	r0, fp			@ frame pointer
+	bl	ftrace_return_to_handler
+	mov	lr, r0			@ r0 has real ret addr
+	ldmia	sp!, {r0-r3}
+	ret	lr
+#endif
+
+ENTRY(ftrace_stub)
+.Lftrace_stub:
+	ret	lr
+ENDPROC(ftrace_stub)
diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c
index 131a6ab..8b96972 100644
--- a/arch/arm/kernel/etm.c
+++ b/arch/arm/kernel/etm.c
@@ -213,7 +213,7 @@
 	int length;
 
 	if (!t->etb_regs) {
-		printk(KERN_INFO "No tracing hardware found\n");
+		pr_info("No tracing hardware found\n");
 		return;
 	}
 
@@ -229,11 +229,11 @@
 
 	etb_writel(t, first, ETBR_READADDR);
 
-	printk(KERN_INFO "Trace buffer contents length: %d\n", length);
-	printk(KERN_INFO "--- ETB buffer begin ---\n");
+	pr_info("Trace buffer contents length: %d\n", length);
+	pr_info("--- ETB buffer begin ---\n");
 	for (; length; length--)
 		printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM)));
-	printk(KERN_INFO "\n--- ETB buffer end ---\n");
+	pr_info("\n--- ETB buffer end ---\n");
 
 	/* deassert the overflow bit */
 	etb_writel(t, 1, ETBR_CTRL);
@@ -633,14 +633,14 @@
 
 	retval = amba_driver_register(&etb_driver);
 	if (retval) {
-		printk(KERN_ERR "Failed to register etb\n");
+		pr_err("Failed to register etb\n");
 		return retval;
 	}
 
 	retval = amba_driver_register(&etm_driver);
 	if (retval) {
 		amba_driver_unregister(&etb_driver);
-		printk(KERN_ERR "Failed to probe etm\n");
+		pr_err("Failed to probe etm\n");
 		return retval;
 	}
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index b37752a..059c3da 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -124,7 +124,7 @@
 void release_fiq(struct fiq_handler *f)
 {
 	if (current_fiq != f) {
-		printk(KERN_ERR "%s FIQ trying to release %s FIQ\n",
+		pr_err("%s FIQ trying to release %s FIQ\n",
 		       f->name, current_fiq->name);
 		dump_stack();
 		return;
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index af9a8a9..b8c75e4 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -15,6 +15,7 @@
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/stop_machine.h>
 
 #include <asm/cacheflush.h>
 #include <asm/opcodes.h>
@@ -35,6 +36,22 @@
 
 #define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
 
+static int __ftrace_modify_code(void *data)
+{
+	int *command = data;
+
+	set_kernel_text_rw();
+	ftrace_modify_all_code(*command);
+	set_kernel_text_ro();
+
+	return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+	stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
 static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
 {
 	return rec->arch.old_mcount ? OLD_NOP : NOP;
@@ -73,6 +90,8 @@
 int ftrace_arch_code_modify_post_process(void)
 {
 	set_all_modules_text_ro();
+	/* Make sure any TLB misses during machine stop are cleared. */
+	flush_tlb_all();
 	return 0;
 }
 
diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
index 9203cf8..eedefe0 100644
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -51,6 +51,7 @@
 		from++;
 	}
 }
+EXPORT_SYMBOL(_memcpy_fromio);
 
 /*
  * Copy data from "real" memory space to IO memory space.
@@ -66,6 +67,7 @@
 		to++;
 	}
 }
+EXPORT_SYMBOL(_memcpy_toio);
 
 /*
  * "memset" on IO memory space.
@@ -79,7 +81,4 @@
 		dst++;
 	}
 }
-
-EXPORT_SYMBOL(_memcpy_fromio);
-EXPORT_SYMBOL(_memcpy_toio);
 EXPORT_SYMBOL(_memset_io);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 7c81ec4..ad857bad 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -31,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/ratelimit.h>
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/kallsyms.h>
@@ -82,7 +83,7 @@
 	unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
 
 	if (irq >= nr_irqs) {
-		printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq);
+		pr_err("Trying to set irq flags for IRQ%d\n", irq);
 		return;
 	}
 
@@ -135,7 +136,6 @@
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
-
 static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
@@ -187,8 +187,8 @@
 		affinity_broken = migrate_one_irq(desc);
 		raw_spin_unlock(&desc->lock);
 
-		if (affinity_broken && printk_ratelimit())
-			pr_warn("IRQ%u no longer affine to CPU%u\n",
+		if (affinity_broken)
+			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
 				i, smp_processor_id());
 	}
 
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index ad58e56..49fadbd 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -58,6 +58,7 @@
 #define MMX_SIZE		(0x98)
 
 	.text
+	.arm
 
 /*
  * Lazy switching of Concan coprocessor context
@@ -182,6 +183,8 @@
 	tmcr	wCon, r2
 	ret	lr
 
+ENDPROC(iwmmxt_task_enable)
+
 /*
  * Back up Concan regs to save area and disable access to them
  * (mainly for gdb or sleep mode usage)
@@ -232,6 +235,8 @@
 1:	msr	cpsr_c, ip			@ restore interrupt mode
 	ldmfd	sp!, {r4, pc}
 
+ENDPROC(iwmmxt_task_disable)
+
 /*
  * Copy Concan state to given memory address
  *
@@ -268,6 +273,8 @@
 	msr	cpsr_c, ip			@ restore interrupt mode
 	ret	r3
 
+ENDPROC(iwmmxt_task_copy)
+
 /*
  * Restore Concan state from given memory address
  *
@@ -304,6 +311,8 @@
 	msr	cpsr_c, ip			@ restore interrupt mode
 	ret	r3
 
+ENDPROC(iwmmxt_task_restore)
+
 /*
  * Concan handling on task switch
  *
@@ -335,6 +344,8 @@
 	mrc	p15, 0, r1, c2, c0, 0
 	sub	pc, lr, r1, lsr #32		@ cpwait and return
 
+ENDPROC(iwmmxt_task_switch)
+
 /*
  * Remove Concan ownership of given task
  *
@@ -353,6 +364,8 @@
 	msr	cpsr_c, r2			@ restore interrupts
 	ret	lr
 
+ENDPROC(iwmmxt_task_release)
+
 	.data
 concan_owner:
 	.word	0
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index 4ce4f78..afeeb9e 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -19,7 +19,7 @@
 		insn = arm_gen_nop();
 
 	if (is_static)
-		__patch_text(addr, insn);
+		__patch_text_early(addr, insn);
 	else
 		patch_text(addr, insn);
 }
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index a74b53c..07db2f8 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -12,8 +12,12 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
+#include <linux/uaccess.h>
+
 #include <asm/traps.h>
 
+#include "patch.h"
+
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 {
 	{ "r0", 4, offsetof(struct pt_regs, ARM_r0)},
@@ -244,6 +248,31 @@
 	unregister_die_notifier(&kgdb_notifier);
 }
 
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+	int err;
+
+	/* patch_text() only supports int-sized breakpoints */
+	BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE);
+
+	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				BREAK_INSTR_SIZE);
+	if (err)
+		return err;
+
+	patch_text((void *)bpt->bpt_addr,
+		   *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr);
+
+	return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+	patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr);
+
+	return 0;
+}
+
 /*
  * Register our undef instruction hooks with ARM undef core.
  * We regsiter a hook specifically looking for the KGB break inst
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 8cf0996..de2b085 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -29,6 +29,7 @@
 
 static atomic_t waiting_for_crash_ipi;
 
+static unsigned long dt_mem;
 /*
  * Provide a dummy crash_notes definition while crash dump arrives to arm.
  * This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
@@ -64,7 +65,7 @@
 			return err;
 
 		if (be32_to_cpu(header) == OF_DT_HEADER)
-			kexec_boot_atags = current_segment->mem;
+			dt_mem = current_segment->mem;
 	}
 	return 0;
 }
@@ -126,12 +127,12 @@
 		msecs--;
 	}
 	if (atomic_read(&waiting_for_crash_ipi) > 0)
-		printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n");
+		pr_warn("Non-crashing CPUs did not react to IPI\n");
 
 	crash_save_cpu(regs, smp_processor_id());
 	machine_kexec_mask_interrupts();
 
-	printk(KERN_INFO "Loading crashdump kernel...\n");
+	pr_info("Loading crashdump kernel...\n");
 }
 
 /*
@@ -163,12 +164,12 @@
 	reboot_code_buffer = page_address(image->control_code_page);
 
 	/* Prepare parameters for reboot_code_buffer*/
+	set_kernel_text_rw();
 	kexec_start_address = image->start;
 	kexec_indirection_page = page_list;
 	kexec_mach_type = machine_arch_type;
-	if (!kexec_boot_atags)
-		kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET;
-
+	kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET
+				     + KEXEC_ARM_ATAGS_OFFSET;
 
 	/* copy our kernel relocation code to the control code page */
 	reboot_entry = fncpy(reboot_code_buffer,
@@ -177,7 +178,7 @@
 	reboot_entry_phys = (unsigned long)reboot_entry +
 		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
 
-	printk(KERN_INFO "Bye!\n");
+	pr_info("Bye!\n");
 
 	if (kexec_reinit)
 		kexec_reinit();
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 6a4dffe..bea7db9 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -251,7 +251,7 @@
 #endif
 
 		default:
-			printk(KERN_ERR "%s: unknown relocation: %u\n",
+			pr_err("%s: unknown relocation: %u\n",
 			       module->name, ELF32_R_TYPE(rel->r_info));
 			return -ENOEXEC;
 		}
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index 07314af..5038960 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -1,8 +1,11 @@
 #include <linux/kernel.h>
+#include <linux/spinlock.h>
 #include <linux/kprobes.h>
+#include <linux/mm.h>
 #include <linux/stop_machine.h>
 
 #include <asm/cacheflush.h>
+#include <asm/fixmap.h>
 #include <asm/smp_plat.h>
 #include <asm/opcodes.h>
 
@@ -13,21 +16,77 @@
 	unsigned int insn;
 };
 
-void __kprobes __patch_text(void *addr, unsigned int insn)
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+	__acquires(&patch_lock)
+{
+	unsigned int uintaddr = (uintptr_t) addr;
+	bool module = !core_kernel_text(uintaddr);
+	struct page *page;
+
+	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+		page = vmalloc_to_page(addr);
+	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+		page = virt_to_page(addr);
+	else
+		return addr;
+
+	if (flags)
+		spin_lock_irqsave(&patch_lock, *flags);
+	else
+		__acquire(&patch_lock);
+
+	set_fixmap(fixmap, page_to_phys(page));
+
+	return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
+	__releases(&patch_lock)
+{
+	clear_fixmap(fixmap);
+
+	if (flags)
+		spin_unlock_irqrestore(&patch_lock, *flags);
+	else
+		__release(&patch_lock);
+}
+
+void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
 {
 	bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL);
+	unsigned int uintaddr = (uintptr_t) addr;
+	bool twopage = false;
+	unsigned long flags;
+	void *waddr = addr;
 	int size;
 
+	if (remap)
+		waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
+	else
+		__acquire(&patch_lock);
+
 	if (thumb2 && __opcode_is_thumb16(insn)) {
-		*(u16 *)addr = __opcode_to_mem_thumb16(insn);
+		*(u16 *)waddr = __opcode_to_mem_thumb16(insn);
 		size = sizeof(u16);
-	} else if (thumb2 && ((uintptr_t)addr & 2)) {
+	} else if (thumb2 && (uintaddr & 2)) {
 		u16 first = __opcode_thumb32_first(insn);
 		u16 second = __opcode_thumb32_second(insn);
-		u16 *addrh = addr;
+		u16 *addrh0 = waddr;
+		u16 *addrh1 = waddr + 2;
 
-		addrh[0] = __opcode_to_mem_thumb16(first);
-		addrh[1] = __opcode_to_mem_thumb16(second);
+		twopage = (uintaddr & ~PAGE_MASK) == PAGE_SIZE - 2;
+		if (twopage && remap)
+			addrh1 = patch_map(addr + 2, FIX_TEXT_POKE1, NULL);
+
+		*addrh0 = __opcode_to_mem_thumb16(first);
+		*addrh1 = __opcode_to_mem_thumb16(second);
+
+		if (twopage && addrh1 != addr + 2) {
+			flush_kernel_vmap_range(addrh1, 2);
+			patch_unmap(FIX_TEXT_POKE1, NULL);
+		}
 
 		size = sizeof(u32);
 	} else {
@@ -36,10 +95,16 @@
 		else
 			insn = __opcode_to_mem_arm(insn);
 
-		*(u32 *)addr = insn;
+		*(u32 *)waddr = insn;
 		size = sizeof(u32);
 	}
 
+	if (waddr != addr) {
+		flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
+		patch_unmap(FIX_TEXT_POKE0, &flags);
+	} else
+		__release(&patch_lock);
+
 	flush_icache_range((uintptr_t)(addr),
 			   (uintptr_t)(addr) + size);
 }
@@ -60,16 +125,5 @@
 		.insn = insn,
 	};
 
-	if (cache_ops_need_broadcast()) {
-		stop_machine(patch_text_stop_machine, &patch, cpu_online_mask);
-	} else {
-		bool straddles_word = IS_ENABLED(CONFIG_THUMB2_KERNEL)
-				      && __opcode_is_thumb32(insn)
-				      && ((uintptr_t)addr & 2);
-
-		if (straddles_word)
-			stop_machine(patch_text_stop_machine, &patch, NULL);
-		else
-			__patch_text(addr, insn);
-	}
+	stop_machine(patch_text_stop_machine, &patch, NULL);
 }
diff --git a/arch/arm/kernel/patch.h b/arch/arm/kernel/patch.h
index b4731f2..77e054c 100644
--- a/arch/arm/kernel/patch.h
+++ b/arch/arm/kernel/patch.h
@@ -2,6 +2,16 @@
 #define _ARM_KERNEL_PATCH_H
 
 void patch_text(void *addr, unsigned int insn);
-void __patch_text(void *addr, unsigned int insn);
+void __patch_text_real(void *addr, unsigned int insn, bool remap);
+
+static inline void __patch_text(void *addr, unsigned int insn)
+{
+	__patch_text_real(addr, insn, true);
+}
+
+static inline void __patch_text_early(void *addr, unsigned int insn)
+{
+	__patch_text_real(addr, insn, false);
+}
 
 #endif
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fe972a2..fdfa3a7 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -51,8 +51,8 @@
 static const char *processor_modes[] __maybe_unused = {
   "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
   "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
-  "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" ,
-  "UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
+  "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "MON_32" , "ABT_32" ,
+  "UK8_32" , "UK9_32" , "HYP_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32"
 };
 
 static const char *isa_modes[] __maybe_unused = {
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index 98ea4b7..24b4a04 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -39,13 +39,12 @@
 {
 	struct return_address_data data;
 	struct stackframe frame;
-	register unsigned long current_sp asm ("sp");
 
 	data.level = level + 2;
 	data.addr = NULL;
 
 	frame.fp = (unsigned long)__builtin_frame_address(0);
-	frame.sp = current_sp;
+	frame.sp = current_stack_pointer;
 	frame.lr = (unsigned long)__builtin_return_address(0);
 	frame.pc = (unsigned long)return_address;
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c031063..8361652 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -900,6 +900,7 @@
 		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
 	machine_desc = mdesc;
 	machine_name = mdesc->name;
+	dump_stack_set_arch_desc("%s", mdesc->name);
 
 	if (mdesc->reboot_mode != REBOOT_HARD)
 		reboot_mode = mdesc->reboot_mode;
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index bd19834..8aa6f1b 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -592,7 +592,6 @@
 				}
 				syscall = 0;
 			} else if (thread_flags & _TIF_UPROBE) {
-				clear_thread_flag(TIF_UPROBE);
 				uprobe_notify_resume(regs);
 			} else {
 				clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 13396d3..5e6052e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -225,7 +225,7 @@
 		pr_err("CPU%u: cpu didn't die\n", cpu);
 		return;
 	}
-	printk(KERN_NOTICE "CPU%u: shutdown\n", cpu);
+	pr_notice("CPU%u: shutdown\n", cpu);
 
 	/*
 	 * platform_cpu_kill() is generally expected to do the powering off
@@ -235,7 +235,7 @@
 	 * the requesting CPU and the dying CPU actually losing power.
 	 */
 	if (!platform_cpu_kill(cpu))
-		printk("CPU%u: unable to kill\n", cpu);
+		pr_err("CPU%u: unable to kill\n", cpu);
 }
 
 /*
@@ -351,7 +351,7 @@
 
 	cpu_init();
 
-	printk("CPU%u: Booted secondary processor\n", cpu);
+	pr_debug("CPU%u: Booted secondary processor\n", cpu);
 
 	preempt_disable();
 	trace_hardirqs_off();
@@ -387,9 +387,6 @@
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-	printk(KERN_INFO "SMP: Total of %d processors activated.\n",
-	       num_online_cpus());
-
 	hyp_mode_check();
 }
 
@@ -521,7 +518,7 @@
 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
 		raw_spin_lock(&stop_lock);
-		printk(KERN_CRIT "CPU%u: stopping\n", cpu);
+		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
 		raw_spin_unlock(&stop_lock);
 	}
@@ -615,8 +612,8 @@
 		break;
 
 	default:
-		printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
-		       cpu, ipinr);
+		pr_crit("CPU%u: Unknown IPI message 0x%x\n",
+		        cpu, ipinr);
 		break;
 	}
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 9309021..172c6a05 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -199,7 +199,7 @@
 	 * the timer ticks
 	 */
 	if (twd_timer_rate == 0) {
-		printk(KERN_INFO "Calibrating local timer... ");
+		pr_info("Calibrating local timer... ");
 
 		/* Wait for a tick to start */
 		waitjiffies = get_jiffies_64() + 1;
@@ -223,7 +223,7 @@
 
 		twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5);
 
-		printk("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
+		pr_cont("%lu.%02luMHz.\n", twd_timer_rate / 1000000,
 			(twd_timer_rate / 10000) % 100);
 	}
 }
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index f065eb05..92b7237 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -134,12 +134,10 @@
 		frame.pc = thread_saved_pc(tsk);
 #endif
 	} else {
-		register unsigned long current_sp asm ("sp");
-
 		/* We don't want this function nor the caller */
 		data.skip += 2;
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_sp;
+		frame.sp = current_stack_pointer;
 		frame.lr = (unsigned long)__builtin_return_address(0);
 		frame.pc = (unsigned long)__save_stack_trace;
 	}
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 587fdfe..afdd51e 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -260,7 +260,7 @@
 		return -ENOMEM;
 #endif /* CONFIG_PROC_FS */
 
-	printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n");
+	pr_notice("Registering SWP/SWPB emulation handler\n");
 	register_undef_hook(&swp_hook);
 
 	return 0;
diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c
index 80f0d69..8ff8dbf 100644
--- a/arch/arm/kernel/thumbee.c
+++ b/arch/arm/kernel/thumbee.c
@@ -72,7 +72,7 @@
 	if ((pfr0 & 0x0000f000) != 0x00001000)
 		return 0;
 
-	printk(KERN_INFO "ThumbEE CPU extension supported.\n");
+	pr_info("ThumbEE CPU extension supported.\n");
 	elf_hwcap |= HWCAP_THUMBEE;
 	thread_register_notifier(&thumbee_notifier_block);
 
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 89cfdd6..08b7847 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -165,7 +165,7 @@
 
 	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+	pr_info("CPU%u: update cpu_capacity %lu\n",
 		cpu, arch_scale_cpu_capacity(NULL, cpu));
 }
 
@@ -269,7 +269,7 @@
 
 	update_cpu_capacity(cpuid);
 
-	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
+	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
 		cpu_topology[cpuid].core_id,
 		cpu_topology[cpuid].socket_id, mpidr);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 9f5d818..788e23f 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -198,14 +198,14 @@
 	}
 
 	if (!fp) {
-		printk("no frame pointer");
+		pr_cont("no frame pointer");
 		ok = 0;
 	} else if (verify_stack(fp)) {
-		printk("invalid frame pointer 0x%08x", fp);
+		pr_cont("invalid frame pointer 0x%08x", fp);
 		ok = 0;
 	} else if (fp < (unsigned long)end_of_stack(tsk))
-		printk("frame pointer underflow");
-	printk("\n");
+		pr_cont("frame pointer underflow");
+	pr_cont("\n");
 
 	if (ok)
 		c_backtrace(fp, mode);
@@ -240,8 +240,8 @@
 	static int die_counter;
 	int ret;
 
-	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP
-	       S_ISA "\n", str, err, ++die_counter);
+	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP S_ISA "\n",
+	         str, err, ++die_counter);
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
@@ -250,8 +250,8 @@
 
 	print_modules();
 	__show_regs(regs);
-	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
+	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
+		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
@@ -446,7 +446,7 @@
 die_sig:
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_UNDEFINED) {
-		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
+		pr_info("%s (%d): undefined instruction: pc=%p\n",
 			current->comm, task_pid_nr(current), pc);
 		__show_regs(regs);
 		dump_instr(KERN_INFO, regs);
@@ -496,7 +496,7 @@
 {
 	console_verbose();
 
-	printk(KERN_CRIT "Bad mode in %s handler detected\n", handler[reason]);
+	pr_crit("Bad mode in %s handler detected\n", handler[reason]);
 
 	die("Oops - bad mode", regs, 0);
 	local_irq_disable();
@@ -516,7 +516,7 @@
 
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_SYSCALL) {
-		printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
+		pr_err("[%d] %s: obsolete system call %08x.\n",
 			task_pid_nr(current), current->comm, n);
 		dump_instr(KERN_ERR, regs);
 	}
@@ -694,7 +694,7 @@
 	 * something catastrophic has happened
 	 */
 	if (user_debug & UDBG_SYSCALL) {
-		printk("[%d] %s: arm syscall %d\n",
+		pr_err("[%d] %s: arm syscall %d\n",
 		       task_pid_nr(current), current->comm, no);
 		dump_instr("", regs);
 		if (user_mode(regs)) {
@@ -753,8 +753,8 @@
 
 void __bad_xchg(volatile void *ptr, int size)
 {
-	printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
-		__builtin_return_address(0), ptr, size);
+	pr_err("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
+	       __builtin_return_address(0), ptr, size);
 	BUG();
 }
 EXPORT_SYMBOL(__bad_xchg);
@@ -771,8 +771,8 @@
 
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_BADABORT) {
-		printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
-			task_pid_nr(current), current->comm, code, instr);
+		pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
+		       task_pid_nr(current), current->comm, code, instr);
 		dump_instr(KERN_ERR, regs);
 		show_pte(current->mm, addr);
 	}
@@ -788,29 +788,29 @@
 
 void __readwrite_bug(const char *fn)
 {
-	printk("%s called, but not implemented\n", fn);
+	pr_err("%s called, but not implemented\n", fn);
 	BUG();
 }
 EXPORT_SYMBOL(__readwrite_bug);
 
 void __pte_error(const char *file, int line, pte_t pte)
 {
-	printk("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
+	pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
 }
 
 void __pmd_error(const char *file, int line, pmd_t pmd)
 {
-	printk("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd));
+	pr_err("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd));
 }
 
 void __pgd_error(const char *file, int line, pgd_t pgd)
 {
-	printk("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
+	pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
 }
 
 asmlinkage void __div0(void)
 {
-	printk("Division by zero in kernel.\n");
+	pr_err("Division by zero in kernel.\n");
 	dump_stack();
 }
 EXPORT_SYMBOL(__div0);
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index cbb85c5..0bee233 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -471,7 +471,6 @@
 void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	register unsigned long current_sp asm ("sp");
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
@@ -485,7 +484,7 @@
 			frame.pc = regs->ARM_lr;
 	} else if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.sp = current_sp;
+		frame.sp = current_stack_pointer;
 		frame.lr = (unsigned long)__builtin_return_address(0);
 		frame.pc = (unsigned long)unwind_backtrace;
 	} else {
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8e95aa47..b31aa73 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -8,6 +8,9 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+#include <asm/pgtable.h>
+#endif
 	
 #define PROC_INFO							\
 	. = ALIGN(4);							\
@@ -90,6 +93,11 @@
 		_text = .;
 		HEAD_TEXT
 	}
+
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
+
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -112,6 +120,9 @@
 			ARM_CPU_KEEP(PROC_INFO)
 	}
 
+#ifdef CONFIG_DEBUG_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	RO_DATA(PAGE_SIZE)
 
 	. = ALIGN(4);
@@ -145,7 +156,11 @@
 	_etext = .;			/* End of text and rodata section */
 
 #ifndef CONFIG_XIP_KERNEL
+# ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+# else
 	. = ALIGN(PAGE_SIZE);
+# endif
 	__init_begin = .;
 #endif
 	/*
@@ -219,7 +234,11 @@
 	__data_loc = ALIGN(4);		/* location in binary */
 	. = PAGE_OFFSET + TEXT_OFFSET;
 #else
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+	. = ALIGN(1<<SECTION_SHIFT);
+#else
 	. = ALIGN(THREAD_SIZE);
+#endif
 	__init_end = .;
 	__data_loc = .;
 #endif
diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c
index e42adc6..bdbb8853 100644
--- a/arch/arm/kernel/xscale-cp0.c
+++ b/arch/arm/kernel/xscale-cp0.c
@@ -157,15 +157,14 @@
 
 	if (cpu_has_iwmmxt()) {
 #ifndef CONFIG_IWMMXT
-		printk(KERN_WARNING "CAUTION: XScale iWMMXt coprocessor "
-			"detected, but kernel support is missing.\n");
+		pr_warn("CAUTION: XScale iWMMXt coprocessor detected, but kernel support is missing.\n");
 #else
-		printk(KERN_INFO "XScale iWMMXt coprocessor detected.\n");
+		pr_info("XScale iWMMXt coprocessor detected.\n");
 		elf_hwcap |= HWCAP_IWMMXT;
 		thread_register_notifier(&iwmmxt_notifier_block);
 #endif
 	} else {
-		printk(KERN_INFO "XScale DSP coprocessor detected.\n");
+		pr_info("XScale DSP coprocessor detected.\n");
 		thread_register_notifier(&dsp_notifier_block);
 		cp_access |= 1;
 	}
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 66a477a..7a235b9 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
@@ -77,6 +78,10 @@
 	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	add	sp, sp, #8
 	ldmfd	sp!, {r0, \reg1, \reg2}
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 3bc8eb8..652e4d9 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -53,6 +53,12 @@
  *	data as needed by the implementation including this code. Called
  *	upon code entry.
  *
+ * usave reg1 reg2
+ *
+ *	Unwind annotation macro is corresponding for 'enter' macro.
+ *	It tell unwinder that preserved some provided registers on the stack
+ *	and additional data by a prior 'enter' macro.
+ *
  * exit reg1 reg2
  *
  *	Restore registers with the values previously saved with the
@@ -67,7 +73,12 @@
  */
 
 
+	UNWIND(	.fnstart			)
 		enter	r4, lr
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ in first stmdb block
 
 		subs	r2, r2, #4
 		blt	8f
@@ -79,6 +90,11 @@
 
 1:		subs	r2, r2, #(28)
 		stmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+		usave	r4, lr
+	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
@@ -144,7 +160,10 @@
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				) @ end of second stmfd block
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 8:		movs	r2, r2, lsl #31
 		ldr1b	r1, r3, ne, abort=21f
 		ldr1b	r1, r4, cs, abort=21f
@@ -173,10 +192,13 @@
 		ldr1w	r1, lr, abort=21f
 		beq	17f
 		bgt	18f
+	UNWIND(	.fnend				)
 
 
 		.macro	forward_copy_shift pull push
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 		subs	r2, r2, #28
 		blt	14f
 
@@ -187,7 +209,11 @@
 	CALGN(	bcc	15f			)
 
 11:		stmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				)
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr
+	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
 	PLD(	pld	[r1, #0]		)
 	PLD(	subs	r2, r2, #96		)
 	PLD(	pld	[r1, #28]		)
@@ -221,7 +247,10 @@
 	PLD(	bge	13b			)
 
 		ldmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				) @ end of the second stmfd block
 
+	UNWIND(	.fnstart			)
+		usave	r4, lr			  @ still in first stmdb block
 14:		ands	ip, r2, #28
 		beq	16f
 
@@ -236,6 +265,7 @@
 
 16:		sub	r1, r1, #(\push / 8)
 		b	8b
+	UNWIND(	.fnend				)
 
 		.endm
 
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index d066df6..a9d3db1 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 /*
  * Prototype:
@@ -80,6 +81,10 @@
 	stmdb	sp!, {r0, r2, r3, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save {r0, r2, r3, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	add	sp, sp, #8
 	ldmfd	sp!, {r0, \reg1, \reg2}
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e22..7797e81 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 #define LDR1W_SHIFT	0
 #define STR1W_SHIFT	0
@@ -48,6 +49,10 @@
 	stmdb sp!, {r0, \reg1, \reg2}
 	.endm
 
+	.macro usave reg1 reg2
+	UNWIND(	.save	{r0, \reg1, \reg2}	)
+	.endm
+
 	.macro exit reg1 reg2
 	ldmfd sp!, {r0, \reg1, \reg2}
 	.endm
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index d1fc0c0..69a9d47 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 		.text
 
@@ -27,12 +28,17 @@
  */
 
 ENTRY(memmove)
+	UNWIND(	.fnstart			)
 
 		subs	ip, r0, r1
 		cmphi	r2, ip
 		bls	memcpy
 
 		stmfd	sp!, {r0, r4, lr}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ in first stmfd block
 		add	r1, r1, r2
 		add	r0, r0, r2
 		subs	r2, r2, #4
@@ -45,6 +51,11 @@
 
 1:		subs	r2, r2, #(28)
 		stmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		)
+	UNWIND(	.save	{r5 - r8}		) @ in second stmfd block
 		blt	5f
 
 	CALGN(	ands	ip, r0, #31		)
@@ -97,6 +108,10 @@
 	CALGN(	bcs	2b			)
 
 7:		ldmfd	sp!, {r5 - r8}
+	UNWIND(	.fnend				) @ end of second stmfd block
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
 
 8:		movs	r2, r2, lsl #31
 		ldrneb	r3, [r1, #-1]!
@@ -124,10 +139,13 @@
 		ldr	r3, [r1, #0]
 		beq	17f
 		blt	18f
+	UNWIND(	.fnend				)
 
 
 		.macro	backward_copy_shift push pull
 
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		) @ still in first stmfd block
 		subs	r2, r2, #28
 		blt	14f
 
@@ -137,6 +155,11 @@
 	CALGN(	bcc	15f			)
 
 11:		stmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				)
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save	{r0, r4, lr}		)
+	UNWIND(	.save	{r5 - r9}		) @ in new second stmfd block
 
 	PLD(	pld	[r1, #-4]		)
 	PLD(	subs	r2, r2, #96		)
@@ -171,6 +194,10 @@
 	PLD(	bge	13b			)
 
 		ldmfd	sp!, {r5 - r9}
+	UNWIND(	.fnend				) @ end of the second stmfd block
+
+	UNWIND(	.fnstart			)
+	UNWIND(	.save {r0, r4, lr}		) @ still in first stmfd block
 
 14:		ands	ip, r2, #28
 		beq	16f
@@ -186,6 +213,7 @@
 
 16:		add	r1, r1, #(\pull / 8)
 		b	8b
+	UNWIND(	.fnend				)
 
 		.endm
 
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 671455c..a4ee97b 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -11,11 +11,13 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 	.text
 	.align	5
 
 ENTRY(memset)
+UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
 	mov	ip, r0			@ preserve r0 as return value
 	bne	6f			@ 1
@@ -34,6 +36,9 @@
  * We need 2 extra registers for this loop - use r8 and the LR
  */
 	stmfd	sp!, {r8, lr}
+UNWIND( .fnend              )
+UNWIND( .fnstart            )
+UNWIND( .save {r8, lr}      )
 	mov	r8, r1
 	mov	lr, r1
 
@@ -53,6 +58,7 @@
 	tst	r2, #16
 	stmneia	ip!, {r1, r3, r8, lr}
 	ldmfd	sp!, {r8, lr}
+UNWIND( .fnend              )
 
 #else
 
@@ -62,6 +68,9 @@
  */
 
 	stmfd	sp!, {r4-r8, lr}
+UNWIND( .fnend                 )
+UNWIND( .fnstart               )
+UNWIND( .save {r4-r8, lr}      )
 	mov	r4, r1
 	mov	r5, r1
 	mov	r6, r1
@@ -94,9 +103,11 @@
 	tst	r2, #16
 	stmneia	ip!, {r4-r7}
 	ldmfd	sp!, {r4-r8, lr}
+UNWIND( .fnend                 )
 
 #endif
 
+UNWIND( .fnstart            )
 4:	tst	r2, #8
 	stmneia	ip!, {r1, r3}
 	tst	r2, #4
@@ -120,4 +131,5 @@
 	strb	r1, [ip], #1		@ 1
 	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
 	b	1b
+UNWIND( .fnend   )
 ENDPROC(memset)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 385ccb3..0eded95 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 	.text
 	.align	5
@@ -18,6 +19,7 @@
  * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
  * don't bother; we use byte stores instead.
  */
+UNWIND(	.fnstart			)
 1:	subs	r1, r1, #4		@ 1 do we have enough
 	blt	5f			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
@@ -47,6 +49,9 @@
  * use the LR
  */
 	str	lr, [sp, #-4]!		@ 1
+UNWIND(	.fnend				)
+UNWIND(	.fnstart			)
+UNWIND(	.save 	{lr}			)
 	mov	ip, r2			@ 1
 	mov	lr, r2			@ 1
 
@@ -66,6 +71,7 @@
 	tst	r1, #16			@ 1 16 bytes or more?
 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
 	ldr	lr, [sp], #4		@ 1
+UNWIND(	.fnend				)
 
 #else
 
@@ -75,6 +81,9 @@
  */
 
 	stmfd	sp!, {r4-r7, lr}
+UNWIND(	.fnend		       )
+UNWIND(	.fnstart	       )
+UNWIND(	.save 	{r4-r7, lr}    )
 	mov	r4, r2
 	mov	r5, r2
 	mov	r6, r2
@@ -105,9 +114,11 @@
 	tst	r1, #16
 	stmneia	r0!, {r4-r7}
 	ldmfd	sp!, {r4-r7, lr}
+UNWIND(	.fnend		       )
 
 #endif
 
+UNWIND(	.fnstart			)
 4:	tst	r1, #8			@ 1 8 bytes or more?
 	stmneia	r0!, {r2, r3}		@ 2
 	tst	r1, #4			@ 1 4 bytes or more?
@@ -122,4 +133,5 @@
 	tst	r1, #1			@ 1 a byte left over
 	strneb	r2, [r0], #1		@ 1
 	ret	lr			@ 1
+UNWIND(	.fnend				)
 ENDPROC(__memzero)
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index 9fa6a99..03c75a8 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -15,10 +15,12 @@
 #include <linux/clkdev.h>
 
 #include <mach/hardware.h>
+#include <mach/generic.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
 	void			(*disable)(struct clk *);
+	unsigned long		(*get_rate)(struct clk *);
 };
 
 struct clk {
@@ -33,13 +35,6 @@
 
 static DEFINE_SPINLOCK(clocks_lock);
 
-/* Dummy clk routine to build generic kernel parts that may be using them */
-unsigned long clk_get_rate(struct clk *clk)
-{
-	return 0;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
 static void clk_gpio27_enable(struct clk *clk)
 {
 	/*
@@ -58,6 +53,19 @@
 	GAFR &= ~GPIO_32_768kHz;
 }
 
+static void clk_cpu_enable(struct clk *clk)
+{
+}
+
+static void clk_cpu_disable(struct clk *clk)
+{
+}
+
+static unsigned long clk_cpu_get_rate(struct clk *clk)
+{
+	return sa11x0_getspeed(0) * 1000;
+}
+
 int clk_enable(struct clk *clk)
 {
 	unsigned long flags;
@@ -87,16 +95,37 @@
 }
 EXPORT_SYMBOL(clk_disable);
 
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (clk && clk->ops && clk->ops->get_rate)
+		return clk->ops->get_rate(clk);
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
 const struct clkops clk_gpio27_ops = {
 	.enable		= clk_gpio27_enable,
 	.disable	= clk_gpio27_disable,
 };
 
+const struct clkops clk_cpu_ops = {
+	.enable		= clk_cpu_enable,
+	.disable	= clk_cpu_disable,
+	.get_rate	= clk_cpu_get_rate,
+};
+
 static DEFINE_CLK(gpio27, &clk_gpio27_ops);
 
+static DEFINE_CLK(cpu, &clk_cpu_ops);
+
 static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa1111.0", NULL, &clk_gpio27),
 	CLKDEV_INIT("sa1100-rtc", NULL, NULL),
+	CLKDEV_INIT("sa11x0-fb", NULL, &clk_cpu),
+	CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
+	/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
+	CLKDEV_INIT("1800", NULL, &clk_cpu),
 };
 
 static int __init sa11xx_clk_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 108939f..b90c7d8 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -30,7 +30,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
-#include <linux/pda_power.h>
+#include <linux/power/gpio-charger.h>
 
 #include <video/sa1100fb.h>
 
@@ -131,62 +131,24 @@
 /*
  * Collie AC IN
  */
-static int collie_power_init(struct device *dev)
-{
-	int ret = gpio_request(COLLIE_GPIO_AC_IN, "ac in");
-	if (ret)
-		goto err_gpio_req;
-
-	ret = gpio_direction_input(COLLIE_GPIO_AC_IN);
-	if (ret)
-		goto err_gpio_in;
-
-	return 0;
-
-err_gpio_in:
-	gpio_free(COLLIE_GPIO_AC_IN);
-err_gpio_req:
-	return ret;
-}
-
-static void collie_power_exit(struct device *dev)
-{
-	gpio_free(COLLIE_GPIO_AC_IN);
-}
-
-static int collie_power_ac_online(void)
-{
-	return gpio_get_value(COLLIE_GPIO_AC_IN) == 2;
-}
-
 static char *collie_ac_supplied_to[] = {
 	"main-battery",
 	"backup-battery",
 };
 
-static struct pda_power_pdata collie_power_data = {
-	.init			= collie_power_init,
-	.is_ac_online		= collie_power_ac_online,
-	.exit			= collie_power_exit,
+
+static struct gpio_charger_platform_data collie_power_data = {
+	.name			= "charger",
+	.type			= POWER_SUPPLY_TYPE_MAINS,
+	.gpio			= COLLIE_GPIO_AC_IN,
 	.supplied_to		= collie_ac_supplied_to,
 	.num_supplicants	= ARRAY_SIZE(collie_ac_supplied_to),
 };
 
-static struct resource collie_power_resource[] = {
-	{
-		.name		= "ac",
-		.flags		= IORESOURCE_IRQ |
-				  IORESOURCE_IRQ_HIGHEDGE |
-				  IORESOURCE_IRQ_LOWEDGE,
-	},
-};
-
 static struct platform_device collie_power_device = {
-	.name			= "pda-power",
+	.name			= "gpio-charger",
 	.id			= -1,
 	.dev.platform_data	= &collie_power_data,
-	.resource		= collie_power_resource,
-	.num_resources		= ARRAY_SIZE(collie_power_resource),
 };
 
 #ifdef CONFIG_SHARP_LOCOMO
@@ -420,9 +382,6 @@
 
 	GPSR |= _COLLIE_GPIO_UCB1x00_RESET;
 
-	collie_power_resource[0].start = gpio_to_irq(COLLIE_GPIO_AC_IN);
-	collie_power_resource[0].end = gpio_to_irq(COLLIE_GPIO_AC_IN);
-
 	sa11x0_ppc_configure_mcp();
 
 
diff --git a/arch/arm/mach-sa1100/include/mach/entry-macro.S b/arch/arm/mach-sa1100/include/mach/entry-macro.S
deleted file mode 100644
index 8cf7630bf..0000000
--- a/arch/arm/mach-sa1100/include/mach/entry-macro.S
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/entry-macro.S
- *
- * Low-level IRQ helper macros for SA1100-based platforms
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-		.macro  get_irqnr_preamble, base, tmp
-		mov	\base, #0xfa000000		@ ICIP = 0xfa050000
-		add	\base, \base, #0x00050000
-		.endm
-
-		.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-		ldr	\irqstat, [\base]		@ get irqs
-		ldr	\irqnr, [\base, #4]		@ ICMR = 0xfa050004
-		ands	\irqstat, \irqstat, \irqnr
-		mov	\irqnr, #0
-		beq	1001f
-		tst	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tsteq	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tsteq	\irqstat, #0xff
-		moveq	\irqstat, \irqstat, lsr #8
-		addeq	\irqnr, \irqnr, #8
-		tst	\irqstat, #0x0f
-		moveq	\irqstat, \irqstat, lsr #4
-		addeq	\irqnr, \irqnr, #4
-		tst	\irqstat, #0x03
-		moveq	\irqstat, \irqstat, lsr #2
-		addeq	\irqnr, \irqnr, #2
-		tst	\irqstat, #0x01
-		addeqs	\irqnr, \irqnr, #1
-1001:
-		.endm
-
diff --git a/arch/arm/mach-sa1100/include/mach/irqs.h b/arch/arm/mach-sa1100/include/mach/irqs.h
index 3790298..de09834 100644
--- a/arch/arm/mach-sa1100/include/mach/irqs.h
+++ b/arch/arm/mach-sa1100/include/mach/irqs.h
@@ -8,56 +8,56 @@
  * 2001/11/14	RMK	Cleaned up and standardised a lot of the IRQs.
  */
 
-#define	IRQ_GPIO0		0
-#define	IRQ_GPIO1		1
-#define	IRQ_GPIO2		2
-#define	IRQ_GPIO3		3
-#define	IRQ_GPIO4		4
-#define	IRQ_GPIO5		5
-#define	IRQ_GPIO6		6
-#define	IRQ_GPIO7		7
-#define	IRQ_GPIO8		8
-#define	IRQ_GPIO9		9
-#define	IRQ_GPIO10		10
-#define	IRQ_GPIO11_27		11
-#define	IRQ_LCD  		12	/* LCD controller           */
-#define	IRQ_Ser0UDC		13	/* Ser. port 0 UDC          */
-#define	IRQ_Ser1SDLC		14	/* Ser. port 1 SDLC         */
-#define	IRQ_Ser1UART		15	/* Ser. port 1 UART         */
-#define	IRQ_Ser2ICP		16	/* Ser. port 2 ICP          */
-#define	IRQ_Ser3UART		17	/* Ser. port 3 UART         */
-#define	IRQ_Ser4MCP		18	/* Ser. port 4 MCP          */
-#define	IRQ_Ser4SSP		19	/* Ser. port 4 SSP          */
-#define	IRQ_DMA0 		20	/* DMA controller channel 0 */
-#define	IRQ_DMA1 		21	/* DMA controller channel 1 */
-#define	IRQ_DMA2 		22	/* DMA controller channel 2 */
-#define	IRQ_DMA3 		23	/* DMA controller channel 3 */
-#define	IRQ_DMA4 		24	/* DMA controller channel 4 */
-#define	IRQ_DMA5 		25	/* DMA controller channel 5 */
-#define	IRQ_OST0 		26	/* OS Timer match 0         */
-#define	IRQ_OST1 		27	/* OS Timer match 1         */
-#define	IRQ_OST2 		28	/* OS Timer match 2         */
-#define	IRQ_OST3 		29	/* OS Timer match 3         */
-#define	IRQ_RTC1Hz		30	/* RTC 1 Hz clock           */
-#define	IRQ_RTCAlrm		31	/* RTC Alarm                */
+#define	IRQ_GPIO0		1
+#define	IRQ_GPIO1		2
+#define	IRQ_GPIO2		3
+#define	IRQ_GPIO3		4
+#define	IRQ_GPIO4		5
+#define	IRQ_GPIO5		6
+#define	IRQ_GPIO6		7
+#define	IRQ_GPIO7		8
+#define	IRQ_GPIO8		9
+#define	IRQ_GPIO9		10
+#define	IRQ_GPIO10		11
+#define	IRQ_GPIO11_27		12
+#define	IRQ_LCD			13	/* LCD controller           */
+#define	IRQ_Ser0UDC		14	/* Ser. port 0 UDC          */
+#define	IRQ_Ser1SDLC		15	/* Ser. port 1 SDLC         */
+#define	IRQ_Ser1UART		16	/* Ser. port 1 UART         */
+#define	IRQ_Ser2ICP		17	/* Ser. port 2 ICP          */
+#define	IRQ_Ser3UART		18	/* Ser. port 3 UART         */
+#define	IRQ_Ser4MCP		19	/* Ser. port 4 MCP          */
+#define	IRQ_Ser4SSP		20	/* Ser. port 4 SSP          */
+#define	IRQ_DMA0		21	/* DMA controller channel 0 */
+#define	IRQ_DMA1		22	/* DMA controller channel 1 */
+#define	IRQ_DMA2		23	/* DMA controller channel 2 */
+#define	IRQ_DMA3		24	/* DMA controller channel 3 */
+#define	IRQ_DMA4		25	/* DMA controller channel 4 */
+#define	IRQ_DMA5		26	/* DMA controller channel 5 */
+#define	IRQ_OST0		27	/* OS Timer match 0         */
+#define	IRQ_OST1		28	/* OS Timer match 1         */
+#define	IRQ_OST2		29	/* OS Timer match 2         */
+#define	IRQ_OST3		30	/* OS Timer match 3         */
+#define	IRQ_RTC1Hz		31	/* RTC 1 Hz clock           */
+#define	IRQ_RTCAlrm		32	/* RTC Alarm                */
 
-#define	IRQ_GPIO11		32
-#define	IRQ_GPIO12		33
-#define	IRQ_GPIO13		34
-#define	IRQ_GPIO14		35
-#define	IRQ_GPIO15		36
-#define	IRQ_GPIO16		37
-#define	IRQ_GPIO17		38
-#define	IRQ_GPIO18		39
-#define	IRQ_GPIO19		40
-#define	IRQ_GPIO20		41
-#define	IRQ_GPIO21		42
-#define	IRQ_GPIO22		43
-#define	IRQ_GPIO23		44
-#define	IRQ_GPIO24		45
-#define	IRQ_GPIO25		46
-#define	IRQ_GPIO26		47
-#define	IRQ_GPIO27		48
+#define	IRQ_GPIO11		33
+#define	IRQ_GPIO12		34
+#define	IRQ_GPIO13		35
+#define	IRQ_GPIO14		36
+#define	IRQ_GPIO15		37
+#define	IRQ_GPIO16		38
+#define	IRQ_GPIO17		39
+#define	IRQ_GPIO18		40
+#define	IRQ_GPIO19		41
+#define	IRQ_GPIO20		42
+#define	IRQ_GPIO21		43
+#define	IRQ_GPIO22		44
+#define	IRQ_GPIO23		45
+#define	IRQ_GPIO24		46
+#define	IRQ_GPIO25		47
+#define	IRQ_GPIO26		48
+#define	IRQ_GPIO27		49
 
 /*
  * The next 16 interrupts are for board specific purposes.  Since
@@ -65,8 +65,8 @@
  * these.  If you need more, increase IRQ_BOARD_END, but keep it
  * within sensible limits.  IRQs 49 to 64 are available.
  */
-#define IRQ_BOARD_START		49
-#define IRQ_BOARD_END		65
+#define IRQ_BOARD_START		50
+#define IRQ_BOARD_END		66
 
 /*
  * Figure out the MAX IRQ number.
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 2124f1fc..63e2901 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -14,17 +14,73 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/ioport.h>
 #include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <asm/mach/irq.h>
+#include <asm/exception.h>
 
 #include "generic.h"
 
 
 /*
+ * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
+ * this is for internal IRQs i.e. from IRQ LCD to RTCAlrm.
+ */
+static void sa1100_mask_irq(struct irq_data *d)
+{
+	ICMR &= ~BIT(d->hwirq);
+}
+
+static void sa1100_unmask_irq(struct irq_data *d)
+{
+	ICMR |= BIT(d->hwirq);
+}
+
+/*
+ * Apart form GPIOs, only the RTC alarm can be a wakeup event.
+ */
+static int sa1100_set_wake(struct irq_data *d, unsigned int on)
+{
+	if (BIT(d->hwirq) == IC_RTCAlrm) {
+		if (on)
+			PWER |= PWER_RTC;
+		else
+			PWER &= ~PWER_RTC;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static struct irq_chip sa1100_normal_chip = {
+	.name		= "SC",
+	.irq_ack	= sa1100_mask_irq,
+	.irq_mask	= sa1100_mask_irq,
+	.irq_unmask	= sa1100_unmask_irq,
+	.irq_set_wake	= sa1100_set_wake,
+};
+
+static int sa1100_normal_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_normal_chip,
+				 handle_level_irq);
+	set_irq_flags(irq, IRQF_VALID);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
+	.map = sa1100_normal_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_normal_irqdomain;
+
+/*
  * SA1100 GPIO edge detection for IRQs:
  * IRQs are generated on Falling-Edge, Rising-Edge, or both.
  * Use this instead of directly setting GRER/GFER.
@@ -33,20 +89,11 @@
 static int GPIO_IRQ_falling_edge;
 static int GPIO_IRQ_mask = (1 << 11) - 1;
 
-/*
- * To get the GPIO number from an IRQ number
- */
-#define GPIO_11_27_IRQ(i)	((i) - 21)
-#define GPIO11_27_MASK(irq)	(1 << GPIO_11_27_IRQ(irq))
-
 static int sa1100_gpio_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int mask;
 
-	if (d->irq <= 10)
-		mask = 1 << d->irq;
-	else
-		mask = GPIO11_27_MASK(d->irq);
+	mask = BIT(d->hwirq);
 
 	if (type == IRQ_TYPE_PROBE) {
 		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
@@ -70,41 +117,51 @@
 }
 
 /*
- * GPIO IRQs must be acknowledged.  This is for IRQs from 0 to 10.
+ * GPIO IRQs must be acknowledged.
  */
-static void sa1100_low_gpio_ack(struct irq_data *d)
+static void sa1100_gpio_ack(struct irq_data *d)
 {
-	GEDR = (1 << d->irq);
+	GEDR = BIT(d->hwirq);
 }
 
-static void sa1100_low_gpio_mask(struct irq_data *d)
-{
-	ICMR &= ~(1 << d->irq);
-}
-
-static void sa1100_low_gpio_unmask(struct irq_data *d)
-{
-	ICMR |= 1 << d->irq;
-}
-
-static int sa1100_low_gpio_wake(struct irq_data *d, unsigned int on)
+static int sa1100_gpio_wake(struct irq_data *d, unsigned int on)
 {
 	if (on)
-		PWER |= 1 << d->irq;
+		PWER |= BIT(d->hwirq);
 	else
-		PWER &= ~(1 << d->irq);
+		PWER &= ~BIT(d->hwirq);
 	return 0;
 }
 
+/*
+ * This is for IRQs from 0 to 10.
+ */
 static struct irq_chip sa1100_low_gpio_chip = {
 	.name		= "GPIO-l",
-	.irq_ack	= sa1100_low_gpio_ack,
-	.irq_mask	= sa1100_low_gpio_mask,
-	.irq_unmask	= sa1100_low_gpio_unmask,
+	.irq_ack	= sa1100_gpio_ack,
+	.irq_mask	= sa1100_mask_irq,
+	.irq_unmask	= sa1100_unmask_irq,
 	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_low_gpio_wake,
+	.irq_set_wake	= sa1100_gpio_wake,
 };
 
+static int sa1100_low_gpio_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
+				 handle_edge_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = {
+	.map = sa1100_low_gpio_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_low_gpio_irqdomain;
+
 /*
  * IRQ11 (GPIO11 through 27) handler.  We enter here with the
  * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
@@ -141,16 +198,9 @@
  * In addition, the IRQs are all collected up into one bit in the
  * interrupt controller registers.
  */
-static void sa1100_high_gpio_ack(struct irq_data *d)
-{
-	unsigned int mask = GPIO11_27_MASK(d->irq);
-
-	GEDR = mask;
-}
-
 static void sa1100_high_gpio_mask(struct irq_data *d)
 {
-	unsigned int mask = GPIO11_27_MASK(d->irq);
+	unsigned int mask = BIT(d->hwirq);
 
 	GPIO_IRQ_mask &= ~mask;
 
@@ -160,7 +210,7 @@
 
 static void sa1100_high_gpio_unmask(struct irq_data *d)
 {
-	unsigned int mask = GPIO11_27_MASK(d->irq);
+	unsigned int mask = BIT(d->hwirq);
 
 	GPIO_IRQ_mask |= mask;
 
@@ -168,61 +218,32 @@
 	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
 }
 
-static int sa1100_high_gpio_wake(struct irq_data *d, unsigned int on)
-{
-	if (on)
-		PWER |= GPIO11_27_MASK(d->irq);
-	else
-		PWER &= ~GPIO11_27_MASK(d->irq);
-	return 0;
-}
-
 static struct irq_chip sa1100_high_gpio_chip = {
 	.name		= "GPIO-h",
-	.irq_ack	= sa1100_high_gpio_ack,
+	.irq_ack	= sa1100_gpio_ack,
 	.irq_mask	= sa1100_high_gpio_mask,
 	.irq_unmask	= sa1100_high_gpio_unmask,
 	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_high_gpio_wake,
+	.irq_set_wake	= sa1100_gpio_wake,
 };
 
-/*
- * We don't need to ACK IRQs on the SA1100 unless they're GPIOs
- * this is for internal IRQs i.e. from 11 to 31.
- */
-static void sa1100_mask_irq(struct irq_data *d)
+static int sa1100_high_gpio_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
 {
-	ICMR &= ~(1 << d->irq);
+	irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
+				 handle_edge_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
 }
 
-static void sa1100_unmask_irq(struct irq_data *d)
-{
-	ICMR |= (1 << d->irq);
-}
-
-/*
- * Apart form GPIOs, only the RTC alarm can be a wakeup event.
- */
-static int sa1100_set_wake(struct irq_data *d, unsigned int on)
-{
-	if (d->irq == IRQ_RTCAlrm) {
-		if (on)
-			PWER |= PWER_RTC;
-		else
-			PWER &= ~PWER_RTC;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static struct irq_chip sa1100_normal_chip = {
-	.name		= "SC",
-	.irq_ack	= sa1100_mask_irq,
-	.irq_mask	= sa1100_mask_irq,
-	.irq_unmask	= sa1100_unmask_irq,
-	.irq_set_wake	= sa1100_set_wake,
+static struct irq_domain_ops sa1100_high_gpio_irqdomain_ops = {
+	.map = sa1100_high_gpio_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
 };
 
+static struct irq_domain *sa1100_high_gpio_irqdomain;
+
 static struct resource irq_resource =
 	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
 
@@ -291,10 +312,25 @@
 
 device_initcall(sa1100irq_init_devicefs);
 
+static asmlinkage void __exception_irq_entry
+sa1100_handle_irq(struct pt_regs *regs)
+{
+	uint32_t icip, icmr, mask;
+
+	do {
+		icip = (ICIP);
+		icmr = (ICMR);
+		mask = icip & icmr;
+
+		if (mask == 0)
+			break;
+
+		handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0, regs);
+	} while (1);
+}
+
 void __init sa1100_init_irq(void)
 {
-	unsigned int irq;
-
 	request_resource(&iomem_resource, &irq_resource);
 
 	/* disable all IRQs */
@@ -314,29 +350,24 @@
 	 */
 	ICCR = 1;
 
-	for (irq = 0; irq <= 10; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
-					 handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
+	sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL,
+			11, IRQ_GPIO0, 0,
+			&sa1100_low_gpio_irqdomain_ops, NULL);
 
-	for (irq = 12; irq <= 31; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_normal_chip,
-					 handle_level_irq);
-		set_irq_flags(irq, IRQF_VALID);
-	}
+	sa1100_normal_irqdomain = irq_domain_add_legacy(NULL,
+			21, IRQ_GPIO11_27, 11,
+			&sa1100_normal_irqdomain_ops, NULL);
 
-	for (irq = 32; irq <= 48; irq++) {
-		irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
-					 handle_edge_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
+	sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL,
+			17, IRQ_GPIO11, 11,
+			&sa1100_high_gpio_irqdomain_ops, NULL);
 
 	/*
 	 * Install handler for GPIO 11-27 edge detect interrupts
 	 */
-	irq_set_chip(IRQ_GPIO11_27, &sa1100_normal_chip);
 	irq_set_chained_handler(IRQ_GPIO11_27, sa1100_high_gpio_handler);
 
+	set_handle_irq(sa1100_handle_irq);
+
 	sa1100_init_gpio();
 }
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index ab906b8..03823e7 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1009,3 +1009,24 @@
 	help
 	  This option specifies the architecture can support big endian
 	  operation.
+
+config ARM_KERNMEM_PERMS
+	bool "Restrict kernel memory permissions"
+	help
+	  If this is set, kernel memory other than kernel text (and rodata)
+	  will be made non-executable. The tradeoff is that each region is
+	  padded to section-size (1MiB) boundaries (because their permissions
+	  are different and splitting the 1M pages into 4K ones causes TLB
+	  performance problems), wasting memory.
+
+config DEBUG_RODATA
+	bool "Make kernel text and rodata read-only"
+	depends on ARM_KERNMEM_PERMS
+	default y
+	help
+	  If this is set, kernel text and rodata will be made read-only. This
+	  is to help catch accidental or malicious attempts to change the
+	  kernel's executable code. Additionally splits rodata from kernel
+	  text so it can be made explicitly non-executable. This creates
+	  another section-size padded region, so it can waste more memory
+	  space while gaining the read-only protections.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 91da64d..d3afdf9 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -6,7 +6,7 @@
 				   iomap.o
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
-				   mmap.o pgd.o mmu.o
+				   mmap.o pgd.o mmu.o pageattr.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 83792f4..2c0c541 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -113,7 +113,7 @@
 		new_usermode |= UM_FIXUP;
 
 		if (warn)
-			printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
+			pr_warn("alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
 	}
 
 	return new_usermode;
@@ -523,7 +523,7 @@
 	 * processor for us.
 	 */
 	if (addr != eaddr) {
-		printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, "
+		pr_err("LDMSTM: PC = %08lx, instr = %08lx, "
 			"addr = %08lx, eaddr = %08lx\n",
 			 instruction_pointer(regs), instr, addr, eaddr);
 		show_regs(regs);
@@ -567,7 +567,7 @@
 	return TYPE_FAULT;
 
 bad:
-	printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n");
+	pr_err("Alignment trap: not handling ldm with s-bit set\n");
 	return TYPE_ERROR;
 }
 
@@ -899,13 +899,13 @@
 	return 0;
 
  swp:
-	printk(KERN_ERR "Alignment trap: not handling swp instruction\n");
+	pr_err("Alignment trap: not handling swp instruction\n");
 
  bad:
 	/*
 	 * Oops, we didn't handle the instruction.
 	 */
-	printk(KERN_ERR "Alignment trap: not handling instruction "
+	pr_err("Alignment trap: not handling instruction "
 		"%0*lx at [<%08lx>]\n",
 		isize << 1,
 		isize == 2 ? tinstr : instr, instrptr);
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index e028a7f..097181e 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -313,7 +313,7 @@
 	 */
 	u = read_extra_features();
 	if (!(u & 0x01000000)) {
-		printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n");
+		pr_info("Feroceon L2: Disabling L2 prefetch.\n");
 		write_extra_features(u | 0x01000000);
 	}
 }
@@ -326,7 +326,7 @@
 	if (!(u & 0x00400000)) {
 		int i, d;
 
-		printk(KERN_INFO "Feroceon L2: Enabling L2\n");
+		pr_info("Feroceon L2: Enabling L2\n");
 
 		d = flush_and_disable_dcache();
 		i = invalidate_and_disable_icache();
@@ -353,7 +353,7 @@
 
 	enable_l2();
 
-	printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n",
+	pr_info("Feroceon L2: Cache support initialised%s.\n",
 			 l2_wt_override ? ", in WT override mode" : "");
 }
 #ifdef CONFIG_OF
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
index b273739..1e373d2 100644
--- a/arch/arm/mm/cache-tauros2.c
+++ b/arch/arm/mm/cache-tauros2.c
@@ -185,7 +185,7 @@
 		u &= ~0x01000000;
 	else
 		u |= 0x01000000;
-	printk(KERN_INFO "Tauros2: %s L2 prefetch.\n",
+	pr_info("Tauros2: %s L2 prefetch.\n",
 			(features & CACHE_TAUROS2_PREFETCH_ON)
 			? "Enabling" : "Disabling");
 
@@ -193,7 +193,7 @@
 		u |= 0x00100000;
 	else
 		u &= ~0x00100000;
-	printk(KERN_INFO "Tauros2: %s line fill burt8.\n",
+	pr_info("Tauros2: %s line fill burt8.\n",
 			(features & CACHE_TAUROS2_LINEFILL_BURST8)
 			? "Enabling" : "Disabling");
 
@@ -216,7 +216,7 @@
 		 */
 		feat = read_extra_features();
 		if (!(feat & 0x00400000)) {
-			printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+			pr_info("Tauros2: Enabling L2 cache.\n");
 			write_extra_features(feat | 0x00400000);
 		}
 
@@ -253,7 +253,7 @@
 		 */
 		actlr = read_actlr();
 		if (!(actlr & 0x00000002)) {
-			printk(KERN_INFO "Tauros2: Enabling L2 cache.\n");
+			pr_info("Tauros2: Enabling L2 cache.\n");
 			write_actlr(actlr | 0x00000002);
 		}
 
@@ -262,11 +262,11 @@
 #endif
 
 	if (mode == NULL) {
-		printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n");
+		pr_crit("Tauros2: Unable to detect CPU mode.\n");
 		return;
 	}
 
-	printk(KERN_INFO "Tauros2: L2 cache support initialised "
+	pr_info("Tauros2: L2 cache support initialised "
 			 "in %s mode.\n", mode);
 }
 
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 6eb97b3..9189256 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -184,36 +184,46 @@
 	u64 asid = atomic64_read(&mm->context.id);
 	u64 generation = atomic64_read(&asid_generation);
 
-	if (asid != 0 && is_reserved_asid(asid)) {
+	if (asid != 0) {
 		/*
-		 * Our current ASID was active during a rollover, we can
-		 * continue to use it and this was just a false alarm.
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
 		 */
-		asid = generation | (asid & ~ASID_MASK);
-	} else {
+		if (is_reserved_asid(asid))
+			return generation | (asid & ~ASID_MASK);
+
 		/*
-		 * Allocate a free ASID. If we can't find one, take a
-		 * note of the currently active ASIDs and mark the TLBs
-		 * as requiring flushes. We always count from ASID #1,
-		 * as we reserve ASID #0 to switch via TTBR0 and to
-		 * avoid speculative page table walks from hitting in
-		 * any partial walk caches, which could be populated
-		 * from overlapping level-1 descriptors used to map both
-		 * the module area and the userspace stack.
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.,
 		 */
-		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
-		if (asid == NUM_USER_ASIDS) {
-			generation = atomic64_add_return(ASID_FIRST_VERSION,
-							 &asid_generation);
-			flush_context(cpu);
-			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
-		}
-		__set_bit(asid, asid_map);
-		cur_idx = asid;
-		asid |= generation;
-		cpumask_clear(mm_cpumask(mm));
+		asid &= ~ASID_MASK;
+		if (!__test_and_set_bit(asid, asid_map))
+			goto bump_gen;
 	}
 
+	/*
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.
+	 * We always count from ASID #1, as we reserve ASID #0 to switch
+	 * via TTBR0 and to avoid speculative page table walks from hitting
+	 * in any partial walk caches, which could be populated from
+	 * overlapping level-1 descriptors used to map both the module
+	 * area and the userspace stack.
+	 */
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+	if (asid == NUM_USER_ASIDS) {
+		generation = atomic64_add_return(ASID_FIRST_VERSION,
+						 &asid_generation);
+		flush_context(cpu);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+	}
+
+	__set_bit(asid, asid_map);
+	cur_idx = asid;
+
+bump_gen:
+	asid |= generation;
+	cpumask_clear(mm_cpumask(mm));
 	return asid;
 }
 
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index b9bcc9d..7042334 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -62,7 +62,7 @@
 	__asm__("mcrr	p15, 0, %1, %0, c6	@ 0xec401f06"
 	   :
 	   : "r" (kto),
-	     "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES)
+	     "r" ((unsigned long)kto + PAGE_SIZE - 1)
 	   : "cc");
 }
 
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index ff379ac..d9e0d00 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -235,7 +235,7 @@
 	const char *reason;
 	unsigned long v = 1;
 
-	printk(KERN_INFO "CPU: Testing write buffer coherency: ");
+	pr_info("CPU: Testing write buffer coherency: ");
 
 	page = alloc_page(GFP_KERNEL);
 	if (page) {
@@ -261,9 +261,9 @@
 	}
 
 	if (v) {
-		printk("failed, %s\n", reason);
+		pr_cont("failed, %s\n", reason);
 		shared_pte_mask = L_PTE_MT_UNCACHED;
 	} else {
-		printk("ok\n");
+		pr_cont("ok\n");
 	}
 }
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index eb8830a..a982dc3 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -63,9 +63,9 @@
 	if (!mm)
 		mm = &init_mm;
 
-	printk(KERN_ALERT "pgd = %p\n", mm->pgd);
+	pr_alert("pgd = %p\n", mm->pgd);
 	pgd = pgd_offset(mm, addr);
-	printk(KERN_ALERT "[%08lx] *pgd=%08llx",
+	pr_alert("[%08lx] *pgd=%08llx",
 			addr, (long long)pgd_val(*pgd));
 
 	do {
@@ -77,31 +77,31 @@
 			break;
 
 		if (pgd_bad(*pgd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
 		pud = pud_offset(pgd, addr);
 		if (PTRS_PER_PUD != 1)
-			printk(", *pud=%08llx", (long long)pud_val(*pud));
+			pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
 
 		if (pud_none(*pud))
 			break;
 
 		if (pud_bad(*pud)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (PTRS_PER_PMD != 1)
-			printk(", *pmd=%08llx", (long long)pmd_val(*pmd));
+			pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
 
 		if (pmd_none(*pmd))
 			break;
 
 		if (pmd_bad(*pmd)) {
-			printk("(bad)");
+			pr_cont("(bad)");
 			break;
 		}
 
@@ -110,15 +110,15 @@
 			break;
 
 		pte = pte_offset_map(pmd, addr);
-		printk(", *pte=%08llx", (long long)pte_val(*pte));
+		pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
 #ifndef CONFIG_ARM_LPAE
-		printk(", *ppte=%08llx",
+		pr_cont(", *ppte=%08llx",
 		       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
 #endif
 		pte_unmap(pte);
 	} while(0);
 
-	printk("\n");
+	pr_cont("\n");
 }
 #else					/* CONFIG_MMU */
 void show_pte(struct mm_struct *mm, unsigned long addr)
@@ -142,10 +142,9 @@
 	 * No handler, we'll have to terminate things with extreme prejudice.
 	 */
 	bust_spinlocks(1);
-	printk(KERN_ALERT
-		"Unable to handle kernel %s at virtual address %08lx\n",
-		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
-		"paging request", addr);
+	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
+		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		 "paging request", addr);
 
 	show_pte(mm, addr);
 	die("Oops", regs, fsr);
@@ -551,7 +550,7 @@
 	if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 		return;
 
-	printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
+	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 		inf->name, fsr, addr);
 
 	info.si_signo = inf->sig;
@@ -583,7 +582,7 @@
 	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 		return;
 
-	printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
+	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 		inf->name, ifsr, addr);
 
 	info.si_signo = inf->sig;
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 265b836..34b66af 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -33,7 +33,7 @@
 	asm(	"mcrr	p15, 0, %1, %0, c14\n"
 	"	mcr	p15, 0, %2, c7, c10, 4"
 	    :
-	    : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES), "r" (zero)
+	    : "r" (to), "r" (to + PAGE_SIZE - 1), "r" (zero)
 	    : "cc");
 }
 
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index e17ed00..b98895d 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,19 +18,20 @@
 #include <asm/tlbflush.h>
 #include "mm.h"
 
-pte_t *fixmap_page_table;
-
 static inline void set_fixmap_pte(int idx, pte_t pte)
 {
 	unsigned long vaddr = __fix_to_virt(idx);
-	set_pte_ext(fixmap_page_table + idx, pte, 0);
+	pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+	set_pte_ext(ptep, pte, 0);
 	local_flush_tlb_kernel_page(vaddr);
 }
 
 static inline pte_t get_fixmap_pte(unsigned long vaddr)
 {
-	unsigned long idx = __virt_to_fix(vaddr);
-	return *(fixmap_page_table + idx);
+	pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+	return *ptep;
 }
 
 void *kmap(struct page *page)
@@ -84,7 +85,7 @@
 	 * With debugging enabled, kunmap_atomic forces that entry to 0.
 	 * Make sure it was indeed properly unmapped.
 	 */
-	BUG_ON(!pte_none(*(fixmap_page_table + idx)));
+	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
 #endif
 	/*
 	 * When debugging is off, kunmap_atomic leaves the previous mapping
@@ -137,7 +138,7 @@
 	idx = type + KM_TYPE_NR * smp_processor_id();
 	vaddr = __fix_to_virt(idx);
 #ifdef CONFIG_DEBUG_HIGHMEM
-	BUG_ON(!pte_none(*(fixmap_page_table + idx)));
+	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
 #endif
 	set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9481f85..98ad9c7 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -29,6 +29,7 @@
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/system_info.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
 
@@ -67,7 +68,7 @@
 
 static int __init parse_tag_initrd(const struct tag *tag)
 {
-	printk(KERN_WARNING "ATAG_INITRD is deprecated; "
+	pr_warn("ATAG_INITRD is deprecated; "
 		"please update your bootloader.\n");
 	phys_initrd_start = __virt_to_phys(tag->u.initrd.start);
 	phys_initrd_size = tag->u.initrd.size;
@@ -544,7 +545,7 @@
 #define MLM(b, t) b, t, ((t) - (b)) >> 20
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
-	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
+	pr_notice("Virtual kernel memory layout:\n"
 			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HAVE_TCM
 			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
@@ -570,7 +571,7 @@
 			MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
 			MLK(ITCM_OFFSET, (unsigned long) itcm_end),
 #endif
-			MLK(FIXADDR_START, FIXADDR_TOP),
+			MLK(FIXADDR_START, FIXADDR_END),
 			MLM(VMALLOC_START, VMALLOC_END),
 			MLM(PAGE_OFFSET, (unsigned long)high_memory),
 #ifdef CONFIG_HIGHMEM
@@ -615,7 +616,145 @@
 	}
 }
 
-void free_initmem(void)
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+struct section_perm {
+	unsigned long start;
+	unsigned long end;
+	pmdval_t mask;
+	pmdval_t prot;
+	pmdval_t clear;
+};
+
+static struct section_perm nx_perms[] = {
+	/* Make pages tables, etc before _stext RW (set NX). */
+	{
+		.start	= PAGE_OFFSET,
+		.end	= (unsigned long)_stext,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+	/* Make init RW (set NX). */
+	{
+		.start	= (unsigned long)__init_begin,
+		.end	= (unsigned long)_sdata,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+#ifdef CONFIG_DEBUG_RODATA
+	/* Make rodata NX (set RO in ro_perms below). */
+	{
+		.start  = (unsigned long)__start_rodata,
+		.end    = (unsigned long)__init_begin,
+		.mask   = ~PMD_SECT_XN,
+		.prot   = PMD_SECT_XN,
+	},
+#endif
+};
+
+#ifdef CONFIG_DEBUG_RODATA
+static struct section_perm ro_perms[] = {
+	/* Make kernel code and rodata RX (set RO). */
+	{
+		.start  = (unsigned long)_stext,
+		.end    = (unsigned long)__init_begin,
+#ifdef CONFIG_ARM_LPAE
+		.mask   = ~PMD_SECT_RDONLY,
+		.prot   = PMD_SECT_RDONLY,
+#else
+		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
+		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,
+		.clear  = PMD_SECT_AP_WRITE,
+#endif
+	},
+};
+#endif
+
+/*
+ * Updates section permissions only for the current mm (sections are
+ * copied into each mm). During startup, this is the init_mm. Is only
+ * safe to be called with preemption disabled, as under stop_machine().
+ */
+static inline void section_update(unsigned long addr, pmdval_t mask,
+				  pmdval_t prot)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	mm = current->active_mm;
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
+
+#ifdef CONFIG_ARM_LPAE
+	pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#else
+	if (addr & SECTION_SIZE)
+		pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot);
+	else
+		pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#endif
+	flush_pmd_entry(pmd);
+	local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE);
+}
+
+/* Make sure extended page tables are in use. */
+static inline bool arch_has_strict_perms(void)
+{
+	if (cpu_architecture() < CPU_ARCH_ARMv6)
+		return false;
+
+	return !!(get_cr() & CR_XP);
+}
+
+#define set_section_perms(perms, field)	{				\
+	size_t i;							\
+	unsigned long addr;						\
+									\
+	if (!arch_has_strict_perms())					\
+		return;							\
+									\
+	for (i = 0; i < ARRAY_SIZE(perms); i++) {			\
+		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||	\
+		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {		\
+			pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
+				perms[i].start, perms[i].end,		\
+				SECTION_SIZE);				\
+			continue;					\
+		}							\
+									\
+		for (addr = perms[i].start;				\
+		     addr < perms[i].end;				\
+		     addr += SECTION_SIZE)				\
+			section_update(addr, perms[i].mask,		\
+				       perms[i].field);			\
+	}								\
+}
+
+static inline void fix_kernmem_perms(void)
+{
+	set_section_perms(nx_perms, prot);
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+	set_section_perms(ro_perms, prot);
+}
+
+void set_kernel_text_rw(void)
+{
+	set_section_perms(ro_perms, clear);
+}
+
+void set_kernel_text_ro(void)
+{
+	set_section_perms(ro_perms, prot);
+}
+#endif /* CONFIG_DEBUG_RODATA */
+
+#else
+static inline void fix_kernmem_perms(void) { }
+#endif /* CONFIG_ARM_KERNMEM_PERMS */
+
+void free_tcmmem(void)
 {
 #ifdef CONFIG_HAVE_TCM
 	extern char __tcm_start, __tcm_end;
@@ -623,6 +762,12 @@
 	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
 	free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
 #endif
+}
+
+void free_initmem(void)
+{
+	fix_kernmem_perms();
+	free_tcmmem();
 
 	poison_init_mem(__init_begin, __init_end - __init_begin);
 	if (!machine_is_integrator() && !machine_is_cintegrator())
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 9f98cec..cda7c40 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -22,6 +22,7 @@
 #include <asm/cputype.h>
 #include <asm/sections.h>
 #include <asm/cachetype.h>
+#include <asm/fixmap.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -52,6 +53,8 @@
  */
 pmd_t *top_pmd;
 
+pmdval_t user_pmd_table = _PAGE_USER_TABLE;
+
 #define CPOLICY_UNCACHED	0
 #define CPOLICY_BUFFERED	1
 #define CPOLICY_WRITETHROUGH	2
@@ -192,7 +195,7 @@
 static int __init early_nocache(char *__unused)
 {
 	char *p = "buffered";
-	printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
+	pr_warn("nocache is deprecated; use cachepolicy=%s\n", p);
 	early_cachepolicy(p);
 	return 0;
 }
@@ -201,7 +204,7 @@
 static int __init early_nowrite(char *__unused)
 {
 	char *p = "uncached";
-	printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
+	pr_warn("nowb is deprecated; use cachepolicy=%s\n", p);
 	early_cachepolicy(p);
 	return 0;
 }
@@ -354,44 +357,29 @@
 }
 EXPORT_SYMBOL(get_mem_type);
 
-#define PTE_SET_FN(_name, pteop) \
-static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \
-			void *data) \
-{ \
-	pte_t pte = pteop(*ptep); \
-\
-	set_pte_ext(ptep, pte, 0); \
-	return 0; \
-} \
+/*
+ * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
+ * As a result, this can only be called with preemption disabled, as under
+ * stop_machine().
+ */
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
+{
+	unsigned long vaddr = __fix_to_virt(idx);
+	pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
 
-#define SET_MEMORY_FN(_name, callback) \
-int set_memory_##_name(unsigned long addr, int numpages) \
-{ \
-	unsigned long start = addr; \
-	unsigned long size = PAGE_SIZE*numpages; \
-	unsigned end = start + size; \
-\
-	if (start < MODULES_VADDR || start >= MODULES_END) \
-		return -EINVAL;\
-\
-	if (end < MODULES_VADDR || end >= MODULES_END) \
-		return -EINVAL; \
-\
-	apply_to_page_range(&init_mm, start, size, callback, NULL); \
-	flush_tlb_kernel_range(start, end); \
-	return 0;\
+	/* Make sure fixmap region does not exceed available allocation. */
+	BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) >
+		     FIXADDR_END);
+	BUG_ON(idx >= __end_of_fixed_addresses);
+
+	if (pgprot_val(prot))
+		set_pte_at(NULL, vaddr, pte,
+			pfn_pte(phys >> PAGE_SHIFT, prot));
+	else
+		pte_clear(NULL, vaddr, pte);
+	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
 }
 
-PTE_SET_FN(ro, pte_wrprotect)
-PTE_SET_FN(rw, pte_mkwrite)
-PTE_SET_FN(x, pte_mkexec)
-PTE_SET_FN(nx, pte_mknexec)
-
-SET_MEMORY_FN(ro, pte_set_ro)
-SET_MEMORY_FN(rw, pte_set_rw)
-SET_MEMORY_FN(x, pte_set_x)
-SET_MEMORY_FN(nx, pte_set_nx)
-
 /*
  * Adjust the PMD section entries according to the CPU in use.
  */
@@ -528,14 +516,23 @@
 	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
 	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
 
+#ifndef CONFIG_ARM_LPAE
 	/*
 	 * We don't use domains on ARMv6 (since this causes problems with
 	 * v6/v7 kernels), so we must use a separate memory type for user
 	 * r/o, kernel r/w to map the vectors page.
 	 */
-#ifndef CONFIG_ARM_LPAE
 	if (cpu_arch == CPU_ARCH_ARMv6)
 		vecs_pgprot |= L_PTE_MT_VECTORS;
+
+	/*
+	 * Check is it with support for the PXN bit
+	 * in the Short-descriptor translation table format descriptors.
+	 */
+	if (cpu_arch == CPU_ARCH_ARMv7 &&
+		(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) == 4) {
+		user_pmd_table |= PMD_PXNTABLE;
+	}
 #endif
 
 	/*
@@ -605,6 +602,11 @@
 	}
 	kern_pgprot |= PTE_EXT_AF;
 	vecs_pgprot |= PTE_EXT_AF;
+
+	/*
+	 * Set PXN for user mappings
+	 */
+	user_pgprot |= PTE_EXT_PXN;
 #endif
 
 	for (i = 0; i < 16; i++) {
@@ -786,8 +788,7 @@
 	length = PAGE_ALIGN(md->length);
 
 	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
-		printk(KERN_ERR "MM: CPU does not support supersection "
-		       "mapping for 0x%08llx at 0x%08lx\n",
+		pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
@@ -799,15 +800,13 @@
 	 *	of the actual domain assignments in use.
 	 */
 	if (type->domain) {
-		printk(KERN_ERR "MM: invalid domain in supersection "
-		       "mapping for 0x%08llx at 0x%08lx\n",
+		pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
 
 	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
-		printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
-		       " at 0x%08lx invalid alignment\n",
+		pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
 		       (long long)__pfn_to_phys((u64)md->pfn), addr);
 		return;
 	}
@@ -850,18 +849,16 @@
 	pgd_t *pgd;
 
 	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
-		printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
-		       " at 0x%08lx in user region\n",
-		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+		pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 		return;
 	}
 
 	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
 	    md->virtual >= PAGE_OFFSET &&
 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
-		printk(KERN_WARNING "BUG: mapping for 0x%08llx"
-		       " at 0x%08lx out of vmalloc space\n",
-		       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 	}
 
 	type = &mem_types[md->type];
@@ -881,9 +878,8 @@
 	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 
 	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
-		printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
-		       "be mapped using pages, ignoring.\n",
-		       (long long)__pfn_to_phys(md->pfn), addr);
+		pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
+			(long long)__pfn_to_phys(md->pfn), addr);
 		return;
 	}
 
@@ -1053,15 +1049,13 @@
 
 	if (vmalloc_reserve < SZ_16M) {
 		vmalloc_reserve = SZ_16M;
-		printk(KERN_WARNING
-			"vmalloc area too small, limiting to %luMB\n",
+		pr_warn("vmalloc area too small, limiting to %luMB\n",
 			vmalloc_reserve >> 20);
 	}
 
 	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
 		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
-		printk(KERN_WARNING
-			"vmalloc area is too big, limiting to %luMB\n",
+		pr_warn("vmalloc area is too big, limiting to %luMB\n",
 			vmalloc_reserve >> 20);
 	}
 
@@ -1094,7 +1088,7 @@
 
 			if (highmem) {
 				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
-					&block_start, &block_end);
+					  &block_start, &block_end);
 				memblock_remove(reg->base, reg->size);
 				continue;
 			}
@@ -1103,7 +1097,7 @@
 				phys_addr_t overlap_size = reg->size - size_limit;
 
 				pr_notice("Truncating RAM at %pa-%pa to -%pa",
-				      &block_start, &block_end, &vmalloc_limit);
+					  &block_start, &block_end, &vmalloc_limit);
 				memblock_remove(vmalloc_limit, overlap_size);
 				block_end = vmalloc_limit;
 			}
@@ -1326,10 +1320,10 @@
 #ifdef CONFIG_HIGHMEM
 	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
 		PKMAP_BASE, _PAGE_KERNEL_TABLE);
-
-	fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START),
-		FIXADDR_START, _PAGE_KERNEL_TABLE);
 #endif
+
+	early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START,
+			_PAGE_KERNEL_TABLE);
 }
 
 static void __init map_lowmem(void)
@@ -1349,13 +1343,20 @@
 		if (start >= end)
 			break;
 
-		if (end < kernel_x_start || start >= kernel_x_end) {
+		if (end < kernel_x_start) {
 			map.pfn = __phys_to_pfn(start);
 			map.virtual = __phys_to_virt(start);
 			map.length = end - start;
 			map.type = MT_MEMORY_RWX;
 
 			create_mapping(&map);
+		} else if (start >= kernel_x_end) {
+			map.pfn = __phys_to_pfn(start);
+			map.virtual = __phys_to_virt(start);
+			map.length = end - start;
+			map.type = MT_MEMORY_RW;
+
+			create_mapping(&map);
 		} else {
 			/* This better cover the entire kernel */
 			if (start < kernel_x_start) {
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
new file mode 100644
index 0000000..004e35c
--- /dev/null
+++ b/arch/arm/mm/pageattr.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+struct page_change_data {
+	pgprot_t set_mask;
+	pgprot_t clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct page_change_data *cdata = data;
+	pte_t pte = *ptep;
+
+	pte = clear_pte_bit(pte, cdata->clear_mask);
+	pte = set_pte_bit(pte, cdata->set_mask);
+
+	set_pte_ext(ptep, pte, 0);
+	return 0;
+}
+
+static int change_memory_common(unsigned long addr, int numpages,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	unsigned long start = addr;
+	unsigned long size = PAGE_SIZE*numpages;
+	unsigned long end = start + size;
+	int ret;
+	struct page_change_data data;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+		start &= PAGE_MASK;
+		end = start + size;
+		WARN_ON_ONCE(1);
+	}
+
+	if (!is_module_address(start) || !is_module_address(end - 1))
+		return -EINVAL;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_RDONLY),
+					__pgprot(0));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_RDONLY));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_XN),
+					__pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_XN));
+}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 22ac2a6..8b4ee5e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -591,9 +591,10 @@
 	/*
 	 * Some Krait processors don't indicate support for SDIV and UDIV
 	 * instructions in the ARM instruction set, even though they actually
-	 * do support them.
+	 * do support them. They also don't indicate support for fused multiply
+	 * instructions even though they actually do support them.
 	 */
-	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV
+	__v7_proc __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
 	.size	__krait_proc_info, . - __krait_proc_info
 
 	/*
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index 4e729f0..ec717c1 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -86,20 +86,20 @@
 static int __init fpe_init(void)
 {
 	if (sizeof(FPA11) > sizeof(union fp_state)) {
-		printk(KERN_ERR "nwfpe: bad structure size\n");
+		pr_err("nwfpe: bad structure size\n");
 		return -EINVAL;
 	}
 
 	if (sizeof(FPREG) != 12) {
-		printk(KERN_ERR "nwfpe: bad register size\n");
+		pr_err("nwfpe: bad register size\n");
 		return -EINVAL;
 	}
 	if (fpe_type[0] && strcmp(fpe_type, "nwfpe"))
 		return 0;
 
 	/* Display title, version and copyright information. */
-	printk(KERN_WARNING "NetWinder Floating Point Emulator V0.97 ("
-	       NWFPE_BITS " precision)\n");
+	pr_info("NetWinder Floating Point Emulator V0.97 ("
+	        NWFPE_BITS " precision)\n");
 
 	thread_register_notifier(&nwfpe_notifier_block);
 
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index cda654c..f74a8f7 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -197,6 +197,12 @@
 	tst	r5, #FPSCR_IXE
 	bne	process_exception
 
+	tst	r5, #FPSCR_LENGTH_MASK
+	beq	skip
+	orr	r1, r1, #FPEXC_DEX
+	b	process_exception
+skip:
+
 	@ Fall into hand on to next handler - appropriate coproc instr
 	@ not recognised by VFP
 
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 2f37e1d..f6e4d56 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -738,63 +738,73 @@
 	vfp_vector = vfp_null_entry;
 
 	pr_info("VFP support v0.3: ");
-	if (VFP_arch)
+	if (VFP_arch) {
 		pr_cont("not present\n");
-	else if (vfpsid & FPSID_NODOUBLE) {
-		pr_cont("no double precision support\n");
-	} else {
-		hotcpu_notifier(vfp_hotplug, 0);
-
-		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;  /* Extract the architecture version */
-		pr_cont("implementor %02x architecture %d part %02x variant %x rev %x\n",
-			(vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
-			(vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT,
-			(vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
-			(vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
-			(vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
-
-		vfp_vector = vfp_support_entry;
-
-		thread_register_notifier(&vfp_notifier_block);
-		vfp_pm_init();
-
-		/*
-		 * We detected VFP, and the support code is
-		 * in place; report VFP support to userspace.
-		 */
-		elf_hwcap |= HWCAP_VFP;
-#ifdef CONFIG_VFPv3
-		if (VFP_arch >= 2) {
-			elf_hwcap |= HWCAP_VFPv3;
-
-			/*
-			 * Check for VFPv3 D16 and VFPv4 D16.  CPUs in
-			 * this configuration only have 16 x 64bit
-			 * registers.
-			 */
-			if (((fmrx(MVFR0) & MVFR0_A_SIMD_MASK)) == 1)
-				elf_hwcap |= HWCAP_VFPv3D16; /* also v4-D16 */
-			else
-				elf_hwcap |= HWCAP_VFPD32;
-		}
-#endif
+		return 0;
+	/* Extract the architecture on CPUID scheme */
+	} else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
+		VFP_arch = vfpsid & FPSID_CPUID_ARCH_MASK;
+		VFP_arch >>= FPSID_ARCH_BIT;
 		/*
 		 * Check for the presence of the Advanced SIMD
 		 * load/store instructions, integer and single
 		 * precision floating point operations. Only check
 		 * for NEON if the hardware has the MVFR registers.
 		 */
-		if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
-#ifdef CONFIG_NEON
-			if ((fmrx(MVFR1) & 0x000fff00) == 0x00011100)
-				elf_hwcap |= HWCAP_NEON;
-#endif
-#ifdef CONFIG_VFPv3
+		if (IS_ENABLED(CONFIG_NEON) &&
+		   (fmrx(MVFR1) & 0x000fff00) == 0x00011100)
+			elf_hwcap |= HWCAP_NEON;
+
+		if (IS_ENABLED(CONFIG_VFPv3)) {
+			u32 mvfr0 = fmrx(MVFR0);
+			if (((mvfr0 & MVFR0_DP_MASK) >> MVFR0_DP_BIT) == 0x2 ||
+			    ((mvfr0 & MVFR0_SP_MASK) >> MVFR0_SP_BIT) == 0x2) {
+				elf_hwcap |= HWCAP_VFPv3;
+				/*
+				 * Check for VFPv3 D16 and VFPv4 D16.  CPUs in
+				 * this configuration only have 16 x 64bit
+				 * registers.
+				 */
+				if ((mvfr0 & MVFR0_A_SIMD_MASK) == 1)
+					/* also v4-D16 */
+					elf_hwcap |= HWCAP_VFPv3D16;
+				else
+					elf_hwcap |= HWCAP_VFPD32;
+			}
+
 			if ((fmrx(MVFR1) & 0xf0000000) == 0x10000000)
 				elf_hwcap |= HWCAP_VFPv4;
-#endif
 		}
+	/* Extract the architecture version on pre-cpuid scheme */
+	} else {
+		if (vfpsid & FPSID_NODOUBLE) {
+			pr_cont("no double precision support\n");
+			return 0;
+		}
+
+		VFP_arch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;
 	}
+
+	hotcpu_notifier(vfp_hotplug, 0);
+
+	vfp_vector = vfp_support_entry;
+
+	thread_register_notifier(&vfp_notifier_block);
+	vfp_pm_init();
+
+	/*
+	 * We detected VFP, and the support code is
+	 * in place; report VFP support to userspace.
+	 */
+	elf_hwcap |= HWCAP_VFP;
+
+	pr_cont("implementor %02x architecture %d part %02x variant %x rev %x\n",
+		(vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
+		VFP_arch,
+		(vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
+		(vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
+		(vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
+
 	return 0;
 }
 
diff --git a/arch/arm/vfp/vfpsingle.c b/arch/arm/vfp/vfpsingle.c
index 4f96c16..f0465ba 100644
--- a/arch/arm/vfp/vfpsingle.c
+++ b/arch/arm/vfp/vfpsingle.c
@@ -290,7 +290,7 @@
 	u32 z, a;
 
 	if ((significand & 0xc0000000) != 0x40000000) {
-		printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
+		pr_warn("VFP: estimate_sqrt: invalid significand\n");
 	}
 
 	a = significand << 1;
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6389d60..a5abb00 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -32,6 +32,9 @@
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
 
+#define dma_rmb()	dmb(oshld)
+#define dma_wmb()	dmb(oshst)
+
 #ifndef CONFIG_SMP
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
diff --git a/arch/blackfin/include/asm/barrier.h b/arch/blackfin/include/asm/barrier.h
index 4200068..dfb66fe 100644
--- a/arch/blackfin/include/asm/barrier.h
+++ b/arch/blackfin/include/asm/barrier.h
@@ -22,6 +22,57 @@
 # define mb()	do { barrier(); smp_check_barrier(); smp_mark_barrier(); } while (0)
 # define rmb()	do { barrier(); smp_check_barrier(); } while (0)
 # define wmb()	do { barrier(); smp_mark_barrier(); } while (0)
+/*
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier.  All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads.  This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies.  See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	b = 2;
+ *	memory_barrier();
+ *	p = &b;				q = p;
+ *					read_barrier_depends();
+ *					d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends().  However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ *	CPU 0				CPU 1
+ *
+ *	a = 2;
+ *	memory_barrier();
+ *	b = 3;				y = b;
+ *					read_barrier_depends();
+ *					x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b".  Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
+ * in cases like this where there are no data dependencies.
+ */
 # define read_barrier_depends()	do { barrier(); smp_check_barrier(); } while (0)
 #endif
 
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index a48957c..f6769eb 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -35,26 +35,25 @@
  * it's (presumably) much slower than mf and (b) mf.a is supported for
  * sequential memory pages only.
  */
-#define mb()	ia64_mf()
-#define rmb()	mb()
-#define wmb()	mb()
-#define read_barrier_depends()	do { } while(0)
+#define mb()		ia64_mf()
+#define rmb()		mb()
+#define wmb()		mb()
+
+#define dma_rmb()	mb()
+#define dma_wmb()	mb()
 
 #ifdef CONFIG_SMP
 # define smp_mb()	mb()
-# define smp_rmb()	rmb()
-# define smp_wmb()	wmb()
-# define smp_read_barrier_depends()	read_barrier_depends()
-
 #else
-
 # define smp_mb()	barrier()
-# define smp_rmb()	barrier()
-# define smp_wmb()	barrier()
-# define smp_read_barrier_depends()	do { } while(0)
-
 #endif
 
+#define smp_rmb()	smp_mb()
+#define smp_wmb()	smp_mb()
+
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
 #define smp_mb__before_atomic()	barrier()
 #define smp_mb__after_atomic()	barrier()
 
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index c7591e8..d703d8e 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -4,8 +4,6 @@
 #include <asm/metag_mem.h>
 
 #define nop()		asm volatile ("NOP")
-#define mb()		wmb()
-#define rmb()		barrier()
 
 #ifdef CONFIG_METAG_META21
 
@@ -41,13 +39,13 @@
 
 #endif /* !CONFIG_METAG_META21 */
 
-static inline void wmb(void)
-{
-	/* flush writes through the write combiner */
-	wr_fence();
-}
+/* flush writes through the write combiner */
+#define mb()		wr_fence()
+#define rmb()		barrier()
+#define wmb()		mb()
 
-#define read_barrier_depends()  do { } while (0)
+#define dma_rmb()	rmb()
+#define dma_wmb()	wmb()
 
 #ifndef CONFIG_SMP
 #define fence()		do { } while (0)
@@ -82,7 +80,10 @@
 #define smp_wmb()       barrier()
 #endif
 #endif
-#define smp_read_barrier_depends()     do { } while (0)
+
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
 #define set_mb(var, value) do { var = value; smp_mb(); } while (0)
 
 #define smp_store_release(p, v)						\
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index d0101dd..2b8bbbc 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -10,58 +10,6 @@
 
 #include <asm/addrspace.h>
 
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
 #define read_barrier_depends()		do { } while(0)
 #define smp_read_barrier_depends()	do { } while(0)
 
@@ -127,20 +75,21 @@
 
 #include <asm/wbflush.h>
 
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
 #define mb()		wbflush()
 #define iob()		wbflush()
 
 #else /* !CONFIG_CPU_HAS_WB */
 
-#define wmb()		fast_wmb()
-#define rmb()		fast_rmb()
 #define mb()		fast_mb()
 #define iob()		fast_iob()
 
 #endif /* !CONFIG_CPU_HAS_WB */
 
+#define wmb()		fast_wmb()
+#define rmb()		fast_rmb()
+#define dma_wmb()	fast_wmb()
+#define dma_rmb()	fast_rmb()
+
 #if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
 # ifdef CONFIG_CPU_CAVIUM_OCTEON
 #  define smp_mb()	__sync()
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index a8c20af..0589290 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -493,7 +493,7 @@
 
 static int __init early_parse_mem(char *p)
 {
-	phys_t start, size;
+	phys_addr_t start, size;
 
 	/*
 	 * If a user specifies memory size, we
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index bab79a1..a3bf5be 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -33,12 +33,9 @@
 #define mb()   __asm__ __volatile__ ("sync" : : : "memory")
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends()  do { } while(0)
 
 #define set_mb(var, value)	do { var = value; mb(); } while (0)
 
-#ifdef CONFIG_SMP
-
 #ifdef __SUBARCH_HAS_LWSYNC
 #    define SMPWMB      LWSYNC
 #else
@@ -46,20 +43,26 @@
 #endif
 
 #define __lwsync()	__asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define dma_rmb()	__lwsync()
+#define dma_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
+
+#ifdef CONFIG_SMP
+#define smp_lwsync()	__lwsync()
 
 #define smp_mb()	mb()
 #define smp_rmb()	__lwsync()
 #define smp_wmb()	__asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_read_barrier_depends()	read_barrier_depends()
 #else
-#define __lwsync()	barrier()
+#define smp_lwsync()	barrier()
 
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while(0)
 #endif /* CONFIG_SMP */
 
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
 /*
  * This is a barrier which prevents following instructions from being
  * started until the value of the argument x is known.  For example, if
@@ -72,7 +75,7 @@
 #define smp_store_release(p, v)						\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
-	__lwsync();							\
+	smp_lwsync();							\
 	ACCESS_ONCE(*p) = (v);						\
 } while (0)
 
@@ -80,7 +83,7 @@
 ({									\
 	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
-	__lwsync();							\
+	smp_lwsync();							\
 	___p1;								\
 })
 
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index b5dce65..8d72471 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -24,11 +24,14 @@
 
 #define rmb()				mb()
 #define wmb()				mb()
-#define read_barrier_depends()		do { } while(0)
+#define dma_rmb()			rmb()
+#define dma_wmb()			wmb()
 #define smp_mb()			mb()
 #define smp_rmb()			rmb()
 #define smp_wmb()			wmb()
-#define smp_read_barrier_depends()	read_barrier_depends()
+
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
 
 #define smp_mb__before_atomic()		smp_mb()
 #define smp_mb__after_atomic()		smp_mb()
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 305dcc3..7664894 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -37,7 +37,9 @@
 #define rmb()	__asm__ __volatile__("":::"memory")
 #define wmb()	__asm__ __volatile__("":::"memory")
 
-#define read_barrier_depends()		do { } while(0)
+#define dma_rmb()	rmb()
+#define dma_wmb()	wmb()
+
 #define set_mb(__var, __value) \
 	do { __var = __value; membar_safe("#StoreLoad"); } while(0)
 
@@ -51,7 +53,8 @@
 #define smp_wmb()	__asm__ __volatile__("":::"memory")
 #endif
 
-#define smp_read_barrier_depends()	do { } while(0)
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
 
 #define smp_store_release(p, v)						\
 do {									\
diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h
index 58ab64d..6e9004a 100644
--- a/arch/sparc/include/asm/ldc.h
+++ b/arch/sparc/include/asm/ldc.h
@@ -61,6 +61,7 @@
 
 /* Register TX and RX queues of the link with the hypervisor.  */
 int ldc_bind(struct ldc_channel *lp);
+void ldc_unbind(struct ldc_channel *lp);
 
 /* For non-RAW protocols we need to complete a handshake before
  * communication can proceed.  ldc_connect() does that, if the
diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index fb124fe..8174f6c 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -300,6 +300,21 @@
 		((dr->prod - dr->cons) & (ring_size - 1)) - 1);
 }
 
+static inline u32 vio_dring_next(struct vio_dring_state *dr, u32 index)
+{
+	if (++index == dr->num_entries)
+		index = 0;
+	return index;
+}
+
+static inline u32 vio_dring_prev(struct vio_dring_state *dr, u32 index)
+{
+	if (index == 0)
+		return dr->num_entries - 1;
+	else
+		return index - 1;
+}
+
 #define VIO_MAX_TYPE_LEN	32
 #define VIO_MAX_COMPAT_LEN	64
 
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 4310332..274a9f5 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1222,11 +1222,12 @@
 }
 EXPORT_SYMBOL(ldc_alloc);
 
-void ldc_free(struct ldc_channel *lp)
+void ldc_unbind(struct ldc_channel *lp)
 {
 	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
 		free_irq(lp->cfg.rx_irq, lp);
 		free_irq(lp->cfg.tx_irq, lp);
+		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
 	}
 
 	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
@@ -1240,10 +1241,15 @@
 		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
 	}
 
+	ldc_set_state(lp, LDC_STATE_INIT);
+}
+EXPORT_SYMBOL(ldc_unbind);
+
+void ldc_free(struct ldc_channel *lp)
+{
+	ldc_unbind(lp);
 	hlist_del(&lp->list);
-
 	kfree(lp->mssbuf);
-
 	ldc_iommu_release(lp);
 
 	kfree(lp);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index ea2bad3..71e16f2 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -368,7 +368,7 @@
 	unsigned long arg5;
 	unsigned long processors_in[NR_CPUS];	/* Set when ipi entered. */
 	unsigned long processors_out[NR_CPUS];	/* Set when ipi exited. */
-} ccall_info;
+} ccall_info __attribute__((aligned(8)));
 
 static DEFINE_SPINLOCK(cross_call_lock);
 
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 0f4460b..2ab1eb3 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -24,78 +24,28 @@
 #define wmb()	asm volatile("sfence" ::: "memory")
 #endif
 
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier.  All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads.  This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies.  See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	b = 2;
- *	memory_barrier();
- *	p = &b;				q = p;
- *					read_barrier_depends();
- *					d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends().  However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- *	CPU 0				CPU 1
- *
- *	a = 2;
- *	memory_barrier();
- *	b = 3;				y = b;
- *					read_barrier_depends();
- *					x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b".  Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends()	do { } while (0)
+#ifdef CONFIG_X86_PPRO_FENCE
+#define dma_rmb()	rmb()
+#else
+#define dma_rmb()	barrier()
+#endif
+#define dma_wmb()	barrier()
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb()	rmb()
-#else
-# define smp_rmb()	barrier()
-#endif
+#define smp_rmb()	dma_rmb()
 #define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
 #else /* !SMP */
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif /* SMP */
 
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
 #if defined(CONFIG_X86_PPRO_FENCE)
 
 /*
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index cc04e67..2d7d9a1 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -29,20 +29,18 @@
 
 #endif /* CONFIG_X86_32 */
 
-#define read_barrier_depends()	do { } while (0)
+#ifdef CONFIG_X86_PPRO_FENCE
+#define dma_rmb()	rmb()
+#else /* CONFIG_X86_PPRO_FENCE */
+#define dma_rmb()	barrier()
+#endif /* CONFIG_X86_PPRO_FENCE */
+#define dma_wmb()	barrier()
 
 #ifdef CONFIG_SMP
 
 #define smp_mb()	mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-#define smp_rmb()	rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
-#define smp_rmb()	barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
-
+#define smp_rmb()	dma_rmb()
 #define smp_wmb()	barrier()
-
-#define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
 #else /* CONFIG_SMP */
@@ -50,11 +48,13 @@
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
-#define smp_read_barrier_depends()	do { } while (0)
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 
 #endif /* CONFIG_SMP */
 
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
 /*
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index cd4de7e..c6bcb8c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
 #include <linux/pci-aspm.h>
+#include <linux/dmar.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
 #include <linux/dmi.h>
@@ -525,6 +526,7 @@
 	struct acpi_pci_root *root;
 	acpi_handle handle = device->handle;
 	int no_aspm = 0, clear_aspm = 0;
+	bool hotadd = system_state != SYSTEM_BOOTING;
 
 	root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL);
 	if (!root)
@@ -571,6 +573,11 @@
 	strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
 	device->driver_data = root;
 
+	if (hotadd && dmar_device_add(handle)) {
+		result = -ENXIO;
+		goto end;
+	}
+
 	pr_info(PREFIX "%s [%s] (domain %04x %pR)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       root->segment, &root->secondary);
@@ -597,7 +604,7 @@
 			root->segment, (unsigned int)root->secondary.start);
 		device->driver_data = NULL;
 		result = -ENODEV;
-		goto end;
+		goto remove_dmar;
 	}
 
 	if (clear_aspm) {
@@ -611,7 +618,7 @@
 	if (device->wakeup.flags.run_wake)
 		device_set_run_wake(root->bus->bridge, true);
 
-	if (system_state != SYSTEM_BOOTING) {
+	if (hotadd) {
 		pcibios_resource_survey_bus(root->bus);
 		pci_assign_unassigned_root_bus_resources(root->bus);
 	}
@@ -621,6 +628,9 @@
 	pci_unlock_rescan_remove();
 	return 1;
 
+remove_dmar:
+	if (hotadd)
+		dmar_device_remove(handle);
 end:
 	kfree(root);
 	return result;
@@ -639,6 +649,8 @@
 
 	pci_remove_root_bus(root->bus);
 
+	dmar_device_remove(device->handle);
+
 	pci_unlock_rescan_remove();
 
 	kfree(root);
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 973a333..80f4de7 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -95,8 +95,12 @@
 	struct amba_device *pcdev = to_amba_device(dev);
 	int ret = pm_generic_runtime_suspend(dev);
 
-	if (ret == 0 && dev->driver)
-		clk_disable_unprepare(pcdev->pclk);
+	if (ret == 0 && dev->driver) {
+		if (pm_runtime_is_irq_safe(dev))
+			clk_disable(pcdev->pclk);
+		else
+			clk_disable_unprepare(pcdev->pclk);
+	}
 
 	return ret;
 }
@@ -107,7 +111,10 @@
 	int ret;
 
 	if (dev->driver) {
-		ret = clk_prepare_enable(pcdev->pclk);
+		if (pm_runtime_is_irq_safe(dev))
+			ret = clk_enable(pcdev->pclk);
+		else
+			ret = clk_prepare_enable(pcdev->pclk);
 		/* Failure is probably fatal to the system, but... */
 		if (ret)
 			return ret;
@@ -115,7 +122,7 @@
 
 	return pm_generic_runtime_resume(dev);
 }
-#endif
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops amba_pm = {
 	.suspend	= pm_generic_suspend,
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 0ebadf9..4b911ed 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -23,8 +23,8 @@
 
 #define DRV_MODULE_NAME		"sunvdc"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.1"
-#define DRV_MODULE_RELDATE	"February 13, 2013"
+#define DRV_MODULE_VERSION	"1.2"
+#define DRV_MODULE_RELDATE	"November 24, 2014"
 
 static char version[] =
 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
@@ -40,6 +40,8 @@
 #define WAITING_FOR_GEN_CMD	0x04
 #define WAITING_FOR_ANY		-1
 
+static struct workqueue_struct *sunvdc_wq;
+
 struct vdc_req_entry {
 	struct request		*req;
 };
@@ -60,6 +62,10 @@
 	u64			max_xfer_size;
 	u32			vdisk_block_size;
 
+	u64			ldc_timeout;
+	struct timer_list	ldc_reset_timer;
+	struct work_struct	ldc_reset_work;
+
 	/* The server fills these in for us in the disk attribute
 	 * ACK packet.
 	 */
@@ -71,6 +77,10 @@
 	char			disk_name[32];
 };
 
+static void vdc_ldc_reset(struct vdc_port *port);
+static void vdc_ldc_reset_work(struct work_struct *work);
+static void vdc_ldc_reset_timer(unsigned long _arg);
+
 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
 {
 	return container_of(vio, struct vdc_port, vio);
@@ -150,6 +160,21 @@
 	.ioctl		= vdc_ioctl,
 };
 
+static void vdc_blk_queue_start(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	/* restart blk queue when ring is half emptied. also called after
+	 * handshake completes, so check for initial handshake before we've
+	 * allocated a disk.
+	 */
+	if (port->disk && blk_queue_stopped(port->disk->queue) &&
+	    vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) {
+		blk_start_queue(port->disk->queue);
+	}
+
+}
+
 static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
 {
 	if (vio->cmp &&
@@ -163,7 +188,11 @@
 
 static void vdc_handshake_complete(struct vio_driver_state *vio)
 {
+	struct vdc_port *port = to_vdc_port(vio);
+
+	del_timer(&port->ldc_reset_timer);
 	vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
+	vdc_blk_queue_start(port);
 }
 
 static int vdc_handle_unknown(struct vdc_port *port, void *arg)
@@ -269,7 +298,7 @@
 
 	ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
 	desc->hdr.state = VIO_DESC_FREE;
-	dr->cons = (index + 1) & (VDC_TX_RING_SIZE - 1);
+	dr->cons = vio_dring_next(dr, index);
 
 	req = rqe->req;
 	if (req == NULL) {
@@ -281,10 +310,7 @@
 
 	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
 
-	/* restart blk queue when ring is half emptied */
-	if (blk_queue_stopped(port->disk->queue) &&
-	    vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
-		blk_start_queue(port->disk->queue);
+	vdc_blk_queue_start(port);
 }
 
 static int vdc_ack(struct vdc_port *port, void *msgbuf)
@@ -317,17 +343,20 @@
 
 	spin_lock_irqsave(&vio->lock, flags);
 
-	if (unlikely(event == LDC_EVENT_RESET ||
-		     event == LDC_EVENT_UP)) {
+	if (unlikely(event == LDC_EVENT_RESET)) {
 		vio_link_state_change(vio, event);
-		spin_unlock_irqrestore(&vio->lock, flags);
-		return;
+		queue_work(sunvdc_wq, &port->ldc_reset_work);
+		goto out;
+	}
+
+	if (unlikely(event == LDC_EVENT_UP)) {
+		vio_link_state_change(vio, event);
+		goto out;
 	}
 
 	if (unlikely(event != LDC_EVENT_DATA_READY)) {
-		printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
-		spin_unlock_irqrestore(&vio->lock, flags);
-		return;
+		pr_warn(PFX "Unexpected LDC event %d\n", event);
+		goto out;
 	}
 
 	err = 0;
@@ -371,6 +400,7 @@
 	}
 	if (err < 0)
 		vdc_finish(&port->vio, err, WAITING_FOR_ANY);
+out:
 	spin_unlock_irqrestore(&vio->lock, flags);
 }
 
@@ -403,6 +433,8 @@
 			delay = 128;
 	} while (err == -EAGAIN);
 
+	if (err == -ENOTCONN)
+		vdc_ldc_reset(port);
 	return err;
 }
 
@@ -472,7 +504,7 @@
 		printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
 	} else {
 		port->req_id++;
-		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+		dr->prod = vio_dring_next(dr, dr->prod);
 	}
 
 	return err;
@@ -626,7 +658,7 @@
 	err = __vdc_tx_trigger(port);
 	if (err >= 0) {
 		port->req_id++;
-		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+		dr->prod = vio_dring_next(dr, dr->prod);
 		spin_unlock_irqrestore(&port->vio.lock, flags);
 
 		wait_for_completion(&comp.com);
@@ -690,12 +722,9 @@
 	}
 }
 
-static int probe_disk(struct vdc_port *port)
+static int vdc_port_up(struct vdc_port *port)
 {
 	struct vio_completion comp;
-	struct request_queue *q;
-	struct gendisk *g;
-	int err;
 
 	init_completion(&comp.com);
 	comp.err = 0;
@@ -703,10 +732,27 @@
 	port->vio.cmp = &comp;
 
 	vio_port_up(&port->vio);
-
 	wait_for_completion(&comp.com);
-	if (comp.err)
-		return comp.err;
+	return comp.err;
+}
+
+static void vdc_port_down(struct vdc_port *port)
+{
+	ldc_disconnect(port->vio.lp);
+	ldc_unbind(port->vio.lp);
+	vdc_free_tx_ring(port);
+	vio_ldc_free(&port->vio);
+}
+
+static int probe_disk(struct vdc_port *port)
+{
+	struct request_queue *q;
+	struct gendisk *g;
+	int err;
+
+	err = vdc_port_up(port);
+	if (err)
+		return err;
 
 	if (vdc_version_supported(port, 1, 1)) {
 		/* vdisk_size should be set during the handshake, if it wasn't
@@ -819,6 +865,7 @@
 	struct mdesc_handle *hp;
 	struct vdc_port *port;
 	int err;
+	const u64 *ldc_timeout;
 
 	print_version();
 
@@ -848,6 +895,16 @@
 			 VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
 	port->vdisk_size = -1;
 
+	/* Actual wall time may be double due to do_generic_file_read() doing
+	 * a readahead I/O first, and once that fails it will try to read a
+	 * single page.
+	 */
+	ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
+	port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
+	setup_timer(&port->ldc_reset_timer, vdc_ldc_reset_timer,
+		    (unsigned long)port);
+	INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
+
 	err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
 			      vdc_versions, ARRAY_SIZE(vdc_versions),
 			      &vdc_vio_ops, port->disk_name);
@@ -896,8 +953,21 @@
 	struct vdc_port *port = dev_get_drvdata(&vdev->dev);
 
 	if (port) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&port->vio.lock, flags);
+		blk_stop_queue(port->disk->queue);
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+
+		flush_work(&port->ldc_reset_work);
+		del_timer_sync(&port->ldc_reset_timer);
 		del_timer_sync(&port->vio.timer);
 
+		del_gendisk(port->disk);
+		blk_cleanup_queue(port->disk->queue);
+		put_disk(port->disk);
+		port->disk = NULL;
+
 		vdc_free_tx_ring(port);
 		vio_ldc_free(&port->vio);
 
@@ -908,6 +978,102 @@
 	return 0;
 }
 
+static void vdc_requeue_inflight(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	u32 idx;
+
+	for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
+		struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
+		struct vdc_req_entry *rqe = &port->rq_arr[idx];
+		struct request *req;
+
+		ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
+		desc->hdr.state = VIO_DESC_FREE;
+		dr->cons = vio_dring_next(dr, idx);
+
+		req = rqe->req;
+		if (req == NULL) {
+			vdc_end_special(port, desc);
+			continue;
+		}
+
+		rqe->req = NULL;
+		blk_requeue_request(port->disk->queue, req);
+	}
+}
+
+static void vdc_queue_drain(struct vdc_port *port)
+{
+	struct request *req;
+
+	while ((req = blk_fetch_request(port->disk->queue)) != NULL)
+		__blk_end_request_all(req, -EIO);
+}
+
+static void vdc_ldc_reset_timer(unsigned long _arg)
+{
+	struct vdc_port *port = (struct vdc_port *) _arg;
+	struct vio_driver_state *vio = &port->vio;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vio->lock, flags);
+	if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
+		pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
+			port->disk_name, port->ldc_timeout);
+		vdc_queue_drain(port);
+		vdc_blk_queue_start(port);
+	}
+	spin_unlock_irqrestore(&vio->lock, flags);
+}
+
+static void vdc_ldc_reset_work(struct work_struct *work)
+{
+	struct vdc_port *port;
+	struct vio_driver_state *vio;
+	unsigned long flags;
+
+	port = container_of(work, struct vdc_port, ldc_reset_work);
+	vio = &port->vio;
+
+	spin_lock_irqsave(&vio->lock, flags);
+	vdc_ldc_reset(port);
+	spin_unlock_irqrestore(&vio->lock, flags);
+}
+
+static void vdc_ldc_reset(struct vdc_port *port)
+{
+	int err;
+
+	assert_spin_locked(&port->vio.lock);
+
+	pr_warn(PFX "%s ldc link reset\n", port->disk_name);
+	blk_stop_queue(port->disk->queue);
+	vdc_requeue_inflight(port);
+	vdc_port_down(port);
+
+	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
+	if (err) {
+		pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
+		return;
+	}
+
+	err = vdc_alloc_tx_ring(port);
+	if (err) {
+		pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
+		goto err_free_ldc;
+	}
+
+	if (port->ldc_timeout)
+		mod_timer(&port->ldc_reset_timer,
+			  round_jiffies(jiffies + HZ * port->ldc_timeout));
+	mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
+	return;
+
+err_free_ldc:
+	vio_ldc_free(&port->vio);
+}
+
 static const struct vio_device_id vdc_port_match[] = {
 	{
 		.type = "vdc-port",
@@ -927,9 +1093,13 @@
 {
 	int err;
 
+	sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
+	if (!sunvdc_wq)
+		return -ENOMEM;
+
 	err = register_blkdev(0, VDCBLK_NAME);
 	if (err < 0)
-		goto out_err;
+		goto out_free_wq;
 
 	vdc_major = err;
 
@@ -943,7 +1113,8 @@
 	unregister_blkdev(vdc_major, VDCBLK_NAME);
 	vdc_major = 0;
 
-out_err:
+out_free_wq:
+	destroy_workqueue(sunvdc_wq);
 	return err;
 }
 
@@ -951,6 +1122,7 @@
 {
 	vio_unregister_driver(&vdc_port_driver);
 	unregister_blkdev(vdc_major, VDCBLK_NAME);
+	destroy_workqueue(sunvdc_wq);
 }
 
 module_init(vdc_init);
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index db1c9b7..6ed9e9f 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -62,6 +62,20 @@
 	 only be available on older systems if the "ipmi_si_intf.trydefaults=1"
 	 boot argument is passed.
 
+config IPMI_SSIF
+       tristate 'IPMI SMBus handler (SSIF)'
+       select I2C
+       help
+         Provides a driver for a SMBus interface to a BMC, meaning that you
+	 have a driver that must be accessed over an I2C bus instead of a
+	 standard interface.  This module requires I2C support.
+
+config IPMI_POWERNV
+       depends on PPC_POWERNV
+       tristate 'POWERNV (OPAL firmware) IPMI interface'
+       help
+         Provides a driver for OPAL firmware-based IPMI interfaces.
+
 config IPMI_WATCHDOG
        tristate 'IPMI Watchdog Timer'
        help
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 16a9364..f3ffde1 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -7,5 +7,7 @@
 obj-$(CONFIG_IPMI_HANDLER) += ipmi_msghandler.o
 obj-$(CONFIG_IPMI_DEVICE_INTERFACE) += ipmi_devintf.o
 obj-$(CONFIG_IPMI_SI) += ipmi_si.o
+obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
+obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index f816211f..5fa83f7 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -56,6 +56,8 @@
 static void smi_recv_tasklet(unsigned long);
 static void handle_new_recv_msgs(ipmi_smi_t intf);
 static void need_waiter(ipmi_smi_t intf);
+static int handle_one_recv_msg(ipmi_smi_t          intf,
+			       struct ipmi_smi_msg *msg);
 
 static int initialized;
 
@@ -191,12 +193,12 @@
 #endif
 
 struct bmc_device {
-	struct platform_device *dev;
+	struct platform_device pdev;
 	struct ipmi_device_id  id;
 	unsigned char          guid[16];
 	int                    guid_set;
-
-	struct kref	       refcount;
+	char                   name[16];
+	struct kref	       usecount;
 
 	/* bmc device attributes */
 	struct device_attribute device_id_attr;
@@ -210,6 +212,7 @@
 	struct device_attribute guid_attr;
 	struct device_attribute aux_firmware_rev_attr;
 };
+#define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
 
 /*
  * Various statistics for IPMI, these index stats[] in the ipmi_smi
@@ -323,6 +326,9 @@
 
 	struct kref refcount;
 
+	/* Set when the interface is being unregistered. */
+	bool in_shutdown;
+
 	/* Used for a list of interfaces. */
 	struct list_head link;
 
@@ -341,7 +347,6 @@
 
 	struct bmc_device *bmc;
 	char *my_dev_name;
-	char *sysfs_name;
 
 	/*
 	 * This is the lower-layer's sender routine.  Note that you
@@ -377,11 +382,16 @@
 	 * periodic timer interrupt.  The tasklet is for handling received
 	 * messages directly from the handler.
 	 */
-	spinlock_t       waiting_msgs_lock;
-	struct list_head waiting_msgs;
+	spinlock_t       waiting_rcv_msgs_lock;
+	struct list_head waiting_rcv_msgs;
 	atomic_t	 watchdog_pretimeouts_to_deliver;
 	struct tasklet_struct recv_tasklet;
 
+	spinlock_t             xmit_msgs_lock;
+	struct list_head       xmit_msgs;
+	struct ipmi_smi_msg    *curr_msg;
+	struct list_head       hp_xmit_msgs;
+
 	/*
 	 * The list of command receivers that are registered for commands
 	 * on this interface.
@@ -474,6 +484,18 @@
 #define ipmi_get_stat(intf, stat) \
 	((unsigned int) atomic_read(&(intf)->stats[IPMI_STAT_ ## stat]))
 
+static char *addr_src_to_str[] = { "invalid", "hotmod", "hardcoded", "SPMI",
+				   "ACPI", "SMBIOS", "PCI",
+				   "device-tree", "default" };
+
+const char *ipmi_addr_src_to_str(enum ipmi_addr_src src)
+{
+	if (src > SI_DEFAULT)
+		src = 0; /* Invalid */
+	return addr_src_to_str[src];
+}
+EXPORT_SYMBOL(ipmi_addr_src_to_str);
+
 static int is_lan_addr(struct ipmi_addr *addr)
 {
 	return addr->addr_type == IPMI_LAN_ADDR_TYPE;
@@ -517,7 +539,7 @@
 
 	tasklet_kill(&intf->recv_tasklet);
 
-	free_smi_msg_list(&intf->waiting_msgs);
+	free_smi_msg_list(&intf->waiting_rcv_msgs);
 	free_recv_msg_list(&intf->waiting_events);
 
 	/*
@@ -1473,6 +1495,30 @@
 	smi_msg->msgid = msgid;
 }
 
+static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
+		     struct ipmi_smi_msg *smi_msg, int priority)
+{
+	int run_to_completion = intf->run_to_completion;
+	unsigned long flags;
+
+	if (!run_to_completion)
+		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+	if (intf->curr_msg) {
+		if (priority > 0)
+			list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs);
+		else
+			list_add_tail(&smi_msg->link, &intf->xmit_msgs);
+		smi_msg = NULL;
+	} else {
+		intf->curr_msg = smi_msg;
+	}
+	if (!run_to_completion)
+		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+
+	if (smi_msg)
+		handlers->sender(intf->send_info, smi_msg);
+}
+
 /*
  * Separate from ipmi_request so that the user does not have to be
  * supplied in certain circumstances (mainly at panic time).  If
@@ -1497,7 +1543,6 @@
 	struct ipmi_smi_msg      *smi_msg;
 	struct ipmi_recv_msg     *recv_msg;
 	unsigned long            flags;
-	struct ipmi_smi_handlers *handlers;
 
 
 	if (supplied_recv)
@@ -1520,8 +1565,7 @@
 	}
 
 	rcu_read_lock();
-	handlers = intf->handlers;
-	if (!handlers) {
+	if (intf->in_shutdown) {
 		rv = -ENODEV;
 		goto out_err;
 	}
@@ -1856,7 +1900,7 @@
 	}
 #endif
 
-	handlers->sender(intf->send_info, smi_msg, priority);
+	smi_send(intf, intf->handlers, smi_msg, priority);
 	rcu_read_unlock();
 
 	return 0;
@@ -2153,7 +2197,7 @@
 static int __find_bmc_guid(struct device *dev, void *data)
 {
 	unsigned char *id = data;
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 	return memcmp(bmc->guid, id, 16) == 0;
 }
 
@@ -2164,7 +2208,7 @@
 
 	dev = driver_find_device(drv, NULL, guid, __find_bmc_guid);
 	if (dev)
-		return dev_get_drvdata(dev);
+		return to_bmc_device(dev);
 	else
 		return NULL;
 }
@@ -2177,7 +2221,7 @@
 static int __find_bmc_prod_dev_id(struct device *dev, void *data)
 {
 	struct prod_dev_id *id = data;
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return (bmc->id.product_id == id->product_id
 		&& bmc->id.device_id == id->device_id);
@@ -2195,7 +2239,7 @@
 
 	dev = driver_find_device(drv, NULL, &id, __find_bmc_prod_dev_id);
 	if (dev)
-		return dev_get_drvdata(dev);
+		return to_bmc_device(dev);
 	else
 		return NULL;
 }
@@ -2204,84 +2248,92 @@
 			      struct device_attribute *attr,
 			      char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 10, "%u\n", bmc->id.device_id);
 }
+DEVICE_ATTR(device_id, S_IRUGO, device_id_show, NULL);
 
-static ssize_t provides_dev_sdrs_show(struct device *dev,
-				      struct device_attribute *attr,
-				      char *buf)
+static ssize_t provides_device_sdrs_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 10, "%u\n",
 			(bmc->id.device_revision & 0x80) >> 7);
 }
+DEVICE_ATTR(provides_device_sdrs, S_IRUGO, provides_device_sdrs_show, NULL);
 
 static ssize_t revision_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 20, "%u\n",
 			bmc->id.device_revision & 0x0F);
 }
+DEVICE_ATTR(revision, S_IRUGO, revision_show, NULL);
 
-static ssize_t firmware_rev_show(struct device *dev,
-				 struct device_attribute *attr,
-				 char *buf)
+static ssize_t firmware_revision_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 20, "%u.%x\n", bmc->id.firmware_revision_1,
 			bmc->id.firmware_revision_2);
 }
+DEVICE_ATTR(firmware_revision, S_IRUGO, firmware_revision_show, NULL);
 
 static ssize_t ipmi_version_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 20, "%u.%u\n",
 			ipmi_version_major(&bmc->id),
 			ipmi_version_minor(&bmc->id));
 }
+DEVICE_ATTR(ipmi_version, S_IRUGO, ipmi_version_show, NULL);
 
 static ssize_t add_dev_support_show(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 10, "0x%02x\n",
 			bmc->id.additional_device_support);
 }
+DEVICE_ATTR(additional_device_support, S_IRUGO, add_dev_support_show, NULL);
 
 static ssize_t manufacturer_id_show(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 20, "0x%6.6x\n", bmc->id.manufacturer_id);
 }
+DEVICE_ATTR(manufacturer_id, S_IRUGO, manufacturer_id_show, NULL);
 
 static ssize_t product_id_show(struct device *dev,
 			       struct device_attribute *attr,
 			       char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 10, "0x%4.4x\n", bmc->id.product_id);
 }
+DEVICE_ATTR(product_id, S_IRUGO, product_id_show, NULL);
 
 static ssize_t aux_firmware_rev_show(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 21, "0x%02x 0x%02x 0x%02x 0x%02x\n",
 			bmc->id.aux_firmware_revision[3],
@@ -2289,174 +2341,96 @@
 			bmc->id.aux_firmware_revision[1],
 			bmc->id.aux_firmware_revision[0]);
 }
+DEVICE_ATTR(aux_firmware_revision, S_IRUGO, aux_firmware_rev_show, NULL);
 
 static ssize_t guid_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
-	struct bmc_device *bmc = dev_get_drvdata(dev);
+	struct bmc_device *bmc = to_bmc_device(dev);
 
 	return snprintf(buf, 100, "%Lx%Lx\n",
 			(long long) bmc->guid[0],
 			(long long) bmc->guid[8]);
 }
+DEVICE_ATTR(guid, S_IRUGO, guid_show, NULL);
 
-static void remove_files(struct bmc_device *bmc)
+static struct attribute *bmc_dev_attrs[] = {
+	&dev_attr_device_id.attr,
+	&dev_attr_provides_device_sdrs.attr,
+	&dev_attr_revision.attr,
+	&dev_attr_firmware_revision.attr,
+	&dev_attr_ipmi_version.attr,
+	&dev_attr_additional_device_support.attr,
+	&dev_attr_manufacturer_id.attr,
+	&dev_attr_product_id.attr,
+	NULL
+};
+
+static struct attribute_group bmc_dev_attr_group = {
+	.attrs		= bmc_dev_attrs,
+};
+
+static const struct attribute_group *bmc_dev_attr_groups[] = {
+	&bmc_dev_attr_group,
+	NULL
+};
+
+static struct device_type bmc_device_type = {
+	.groups		= bmc_dev_attr_groups,
+};
+
+static void
+release_bmc_device(struct device *dev)
 {
-	if (!bmc->dev)
-		return;
-
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->device_id_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->provides_dev_sdrs_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->revision_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->firmware_rev_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->version_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->add_dev_support_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->manufacturer_id_attr);
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->product_id_attr);
-
-	if (bmc->id.aux_firmware_revision_set)
-		device_remove_file(&bmc->dev->dev,
-				   &bmc->aux_firmware_rev_attr);
-	if (bmc->guid_set)
-		device_remove_file(&bmc->dev->dev,
-				   &bmc->guid_attr);
+	kfree(to_bmc_device(dev));
 }
 
 static void
 cleanup_bmc_device(struct kref *ref)
 {
-	struct bmc_device *bmc;
+	struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount);
 
-	bmc = container_of(ref, struct bmc_device, refcount);
+	if (bmc->id.aux_firmware_revision_set)
+		device_remove_file(&bmc->pdev.dev,
+				   &bmc->aux_firmware_rev_attr);
+	if (bmc->guid_set)
+		device_remove_file(&bmc->pdev.dev,
+				   &bmc->guid_attr);
 
-	remove_files(bmc);
-	platform_device_unregister(bmc->dev);
-	kfree(bmc);
+	platform_device_unregister(&bmc->pdev);
 }
 
 static void ipmi_bmc_unregister(ipmi_smi_t intf)
 {
 	struct bmc_device *bmc = intf->bmc;
 
-	if (intf->sysfs_name) {
-		sysfs_remove_link(&intf->si_dev->kobj, intf->sysfs_name);
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
-	}
+	sysfs_remove_link(&intf->si_dev->kobj, "bmc");
 	if (intf->my_dev_name) {
-		sysfs_remove_link(&bmc->dev->dev.kobj, intf->my_dev_name);
+		sysfs_remove_link(&bmc->pdev.dev.kobj, intf->my_dev_name);
 		kfree(intf->my_dev_name);
 		intf->my_dev_name = NULL;
 	}
 
 	mutex_lock(&ipmidriver_mutex);
-	kref_put(&bmc->refcount, cleanup_bmc_device);
+	kref_put(&bmc->usecount, cleanup_bmc_device);
 	intf->bmc = NULL;
 	mutex_unlock(&ipmidriver_mutex);
 }
 
-static int create_files(struct bmc_device *bmc)
+static int create_bmc_files(struct bmc_device *bmc)
 {
 	int err;
 
-	bmc->device_id_attr.attr.name = "device_id";
-	bmc->device_id_attr.attr.mode = S_IRUGO;
-	bmc->device_id_attr.show = device_id_show;
-	sysfs_attr_init(&bmc->device_id_attr.attr);
-
-	bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs";
-	bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO;
-	bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show;
-	sysfs_attr_init(&bmc->provides_dev_sdrs_attr.attr);
-
-	bmc->revision_attr.attr.name = "revision";
-	bmc->revision_attr.attr.mode = S_IRUGO;
-	bmc->revision_attr.show = revision_show;
-	sysfs_attr_init(&bmc->revision_attr.attr);
-
-	bmc->firmware_rev_attr.attr.name = "firmware_revision";
-	bmc->firmware_rev_attr.attr.mode = S_IRUGO;
-	bmc->firmware_rev_attr.show = firmware_rev_show;
-	sysfs_attr_init(&bmc->firmware_rev_attr.attr);
-
-	bmc->version_attr.attr.name = "ipmi_version";
-	bmc->version_attr.attr.mode = S_IRUGO;
-	bmc->version_attr.show = ipmi_version_show;
-	sysfs_attr_init(&bmc->version_attr.attr);
-
-	bmc->add_dev_support_attr.attr.name = "additional_device_support";
-	bmc->add_dev_support_attr.attr.mode = S_IRUGO;
-	bmc->add_dev_support_attr.show = add_dev_support_show;
-	sysfs_attr_init(&bmc->add_dev_support_attr.attr);
-
-	bmc->manufacturer_id_attr.attr.name = "manufacturer_id";
-	bmc->manufacturer_id_attr.attr.mode = S_IRUGO;
-	bmc->manufacturer_id_attr.show = manufacturer_id_show;
-	sysfs_attr_init(&bmc->manufacturer_id_attr.attr);
-
-	bmc->product_id_attr.attr.name = "product_id";
-	bmc->product_id_attr.attr.mode = S_IRUGO;
-	bmc->product_id_attr.show = product_id_show;
-	sysfs_attr_init(&bmc->product_id_attr.attr);
-
-	bmc->guid_attr.attr.name = "guid";
-	bmc->guid_attr.attr.mode = S_IRUGO;
-	bmc->guid_attr.show = guid_show;
-	sysfs_attr_init(&bmc->guid_attr.attr);
-
-	bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision";
-	bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO;
-	bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show;
-	sysfs_attr_init(&bmc->aux_firmware_rev_attr.attr);
-
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->device_id_attr);
-	if (err)
-		goto out;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->provides_dev_sdrs_attr);
-	if (err)
-		goto out_devid;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->revision_attr);
-	if (err)
-		goto out_sdrs;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->firmware_rev_attr);
-	if (err)
-		goto out_rev;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->version_attr);
-	if (err)
-		goto out_firm;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->add_dev_support_attr);
-	if (err)
-		goto out_version;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->manufacturer_id_attr);
-	if (err)
-		goto out_add_dev;
-	err = device_create_file(&bmc->dev->dev,
-			   &bmc->product_id_attr);
-	if (err)
-		goto out_manu;
 	if (bmc->id.aux_firmware_revision_set) {
-		err = device_create_file(&bmc->dev->dev,
+		bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision";
+		err = device_create_file(&bmc->pdev.dev,
 				   &bmc->aux_firmware_rev_attr);
 		if (err)
-			goto out_prod_id;
+			goto out;
 	}
 	if (bmc->guid_set) {
-		err = device_create_file(&bmc->dev->dev,
+		bmc->guid_attr.attr.name = "guid";
+		err = device_create_file(&bmc->pdev.dev,
 				   &bmc->guid_attr);
 		if (err)
 			goto out_aux_firm;
@@ -2466,44 +2440,17 @@
 
 out_aux_firm:
 	if (bmc->id.aux_firmware_revision_set)
-		device_remove_file(&bmc->dev->dev,
+		device_remove_file(&bmc->pdev.dev,
 				   &bmc->aux_firmware_rev_attr);
-out_prod_id:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->product_id_attr);
-out_manu:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->manufacturer_id_attr);
-out_add_dev:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->add_dev_support_attr);
-out_version:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->version_attr);
-out_firm:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->firmware_rev_attr);
-out_rev:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->revision_attr);
-out_sdrs:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->provides_dev_sdrs_attr);
-out_devid:
-	device_remove_file(&bmc->dev->dev,
-			   &bmc->device_id_attr);
 out:
 	return err;
 }
 
-static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
-			     const char *sysfs_name)
+static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
 {
 	int               rv;
 	struct bmc_device *bmc = intf->bmc;
 	struct bmc_device *old_bmc;
-	int               size;
-	char              dummy[1];
 
 	mutex_lock(&ipmidriver_mutex);
 
@@ -2527,7 +2474,7 @@
 		intf->bmc = old_bmc;
 		bmc = old_bmc;
 
-		kref_get(&bmc->refcount);
+		kref_get(&bmc->usecount);
 		mutex_unlock(&ipmidriver_mutex);
 
 		printk(KERN_INFO
@@ -2537,12 +2484,12 @@
 		       bmc->id.product_id,
 		       bmc->id.device_id);
 	} else {
-		char name[14];
 		unsigned char orig_dev_id = bmc->id.device_id;
 		int warn_printed = 0;
 
-		snprintf(name, sizeof(name),
+		snprintf(bmc->name, sizeof(bmc->name),
 			 "ipmi_bmc.%4.4x", bmc->id.product_id);
+		bmc->pdev.name = bmc->name;
 
 		while (ipmi_find_bmc_prod_dev_id(&ipmidriver.driver,
 						 bmc->id.product_id,
@@ -2566,23 +2513,16 @@
 			}
 		}
 
-		bmc->dev = platform_device_alloc(name, bmc->id.device_id);
-		if (!bmc->dev) {
-			mutex_unlock(&ipmidriver_mutex);
-			printk(KERN_ERR
-			       "ipmi_msghandler:"
-			       " Unable to allocate platform device\n");
-			return -ENOMEM;
-		}
-		bmc->dev->dev.driver = &ipmidriver.driver;
-		dev_set_drvdata(&bmc->dev->dev, bmc);
-		kref_init(&bmc->refcount);
+		bmc->pdev.dev.driver = &ipmidriver.driver;
+		bmc->pdev.id = bmc->id.device_id;
+		bmc->pdev.dev.release = release_bmc_device;
+		bmc->pdev.dev.type = &bmc_device_type;
+		kref_init(&bmc->usecount);
 
-		rv = platform_device_add(bmc->dev);
+		rv = platform_device_register(&bmc->pdev);
 		mutex_unlock(&ipmidriver_mutex);
 		if (rv) {
-			platform_device_put(bmc->dev);
-			bmc->dev = NULL;
+			put_device(&bmc->pdev.dev);
 			printk(KERN_ERR
 			       "ipmi_msghandler:"
 			       " Unable to register bmc device: %d\n",
@@ -2594,10 +2534,10 @@
 			return rv;
 		}
 
-		rv = create_files(bmc);
+		rv = create_bmc_files(bmc);
 		if (rv) {
 			mutex_lock(&ipmidriver_mutex);
-			platform_device_unregister(bmc->dev);
+			platform_device_unregister(&bmc->pdev);
 			mutex_unlock(&ipmidriver_mutex);
 
 			return rv;
@@ -2614,44 +2554,26 @@
 	 * create symlink from system interface device to bmc device
 	 * and back.
 	 */
-	intf->sysfs_name = kstrdup(sysfs_name, GFP_KERNEL);
-	if (!intf->sysfs_name) {
-		rv = -ENOMEM;
-		printk(KERN_ERR
-		       "ipmi_msghandler: allocate link to BMC: %d\n",
-		       rv);
-		goto out_err;
-	}
-
-	rv = sysfs_create_link(&intf->si_dev->kobj,
-			       &bmc->dev->dev.kobj, intf->sysfs_name);
+	rv = sysfs_create_link(&intf->si_dev->kobj, &bmc->pdev.dev.kobj, "bmc");
 	if (rv) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		printk(KERN_ERR
 		       "ipmi_msghandler: Unable to create bmc symlink: %d\n",
 		       rv);
 		goto out_err;
 	}
 
-	size = snprintf(dummy, 0, "ipmi%d", ifnum);
-	intf->my_dev_name = kmalloc(size+1, GFP_KERNEL);
+	intf->my_dev_name = kasprintf(GFP_KERNEL, "ipmi%d", ifnum);
 	if (!intf->my_dev_name) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		rv = -ENOMEM;
 		printk(KERN_ERR
 		       "ipmi_msghandler: allocate link from BMC: %d\n",
 		       rv);
 		goto out_err;
 	}
-	snprintf(intf->my_dev_name, size+1, "ipmi%d", ifnum);
 
-	rv = sysfs_create_link(&bmc->dev->dev.kobj, &intf->si_dev->kobj,
+	rv = sysfs_create_link(&bmc->pdev.dev.kobj, &intf->si_dev->kobj,
 			       intf->my_dev_name);
 	if (rv) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		kfree(intf->my_dev_name);
 		intf->my_dev_name = NULL;
 		printk(KERN_ERR
@@ -2850,7 +2772,6 @@
 		      void		       *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *si_dev,
-		      const char               *sysfs_name,
 		      unsigned char            slave_addr)
 {
 	int              i, j;
@@ -2909,12 +2830,15 @@
 #ifdef CONFIG_PROC_FS
 	mutex_init(&intf->proc_entry_lock);
 #endif
-	spin_lock_init(&intf->waiting_msgs_lock);
-	INIT_LIST_HEAD(&intf->waiting_msgs);
+	spin_lock_init(&intf->waiting_rcv_msgs_lock);
+	INIT_LIST_HEAD(&intf->waiting_rcv_msgs);
 	tasklet_init(&intf->recv_tasklet,
 		     smi_recv_tasklet,
 		     (unsigned long) intf);
 	atomic_set(&intf->watchdog_pretimeouts_to_deliver, 0);
+	spin_lock_init(&intf->xmit_msgs_lock);
+	INIT_LIST_HEAD(&intf->xmit_msgs);
+	INIT_LIST_HEAD(&intf->hp_xmit_msgs);
 	spin_lock_init(&intf->events_lock);
 	atomic_set(&intf->event_waiters, 0);
 	intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
@@ -2984,7 +2908,7 @@
 	if (rv == 0)
 		rv = add_proc_entries(intf, i);
 
-	rv = ipmi_bmc_register(intf, i, sysfs_name);
+	rv = ipmi_bmc_register(intf, i);
 
  out:
 	if (rv) {
@@ -3014,12 +2938,50 @@
 }
 EXPORT_SYMBOL(ipmi_register_smi);
 
+static void deliver_smi_err_response(ipmi_smi_t intf,
+				     struct ipmi_smi_msg *msg,
+				     unsigned char err)
+{
+	msg->rsp[0] = msg->data[0] | 4;
+	msg->rsp[1] = msg->data[1];
+	msg->rsp[2] = err;
+	msg->rsp_size = 3;
+	/* It's an error, so it will never requeue, no need to check return. */
+	handle_one_recv_msg(intf, msg);
+}
+
 static void cleanup_smi_msgs(ipmi_smi_t intf)
 {
 	int              i;
 	struct seq_table *ent;
+	struct ipmi_smi_msg *msg;
+	struct list_head *entry;
+	struct list_head tmplist;
+
+	/* Clear out our transmit queues and hold the messages. */
+	INIT_LIST_HEAD(&tmplist);
+	list_splice_tail(&intf->hp_xmit_msgs, &tmplist);
+	list_splice_tail(&intf->xmit_msgs, &tmplist);
+
+	/* Current message first, to preserve order */
+	while (intf->curr_msg && !list_empty(&intf->waiting_rcv_msgs)) {
+		/* Wait for the message to clear out. */
+		schedule_timeout(1);
+	}
 
 	/* No need for locks, the interface is down. */
+
+	/*
+	 * Return errors for all pending messages in queue and in the
+	 * tables waiting for remote responses.
+	 */
+	while (!list_empty(&tmplist)) {
+		entry = tmplist.next;
+		list_del(entry);
+		msg = list_entry(entry, struct ipmi_smi_msg, link);
+		deliver_smi_err_response(intf, msg, IPMI_ERR_UNSPECIFIED);
+	}
+
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
 		ent = &(intf->seq_table[i]);
 		if (!ent->inuse)
@@ -3031,20 +2993,33 @@
 int ipmi_unregister_smi(ipmi_smi_t intf)
 {
 	struct ipmi_smi_watcher *w;
-	int    intf_num = intf->intf_num;
+	int intf_num = intf->intf_num;
+	ipmi_user_t user;
 
 	ipmi_bmc_unregister(intf);
 
 	mutex_lock(&smi_watchers_mutex);
 	mutex_lock(&ipmi_interfaces_mutex);
 	intf->intf_num = -1;
-	intf->handlers = NULL;
+	intf->in_shutdown = true;
 	list_del_rcu(&intf->link);
 	mutex_unlock(&ipmi_interfaces_mutex);
 	synchronize_rcu();
 
 	cleanup_smi_msgs(intf);
 
+	/* Clean up the effects of users on the lower-level software. */
+	mutex_lock(&ipmi_interfaces_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(user, &intf->users, link) {
+		module_put(intf->handlers->owner);
+		if (intf->handlers->dec_usecount)
+			intf->handlers->dec_usecount(intf->send_info);
+	}
+	rcu_read_unlock();
+	intf->handlers = NULL;
+	mutex_unlock(&ipmi_interfaces_mutex);
+
 	remove_proc_entries(intf);
 
 	/*
@@ -3134,7 +3109,6 @@
 	ipmi_user_t              user = NULL;
 	struct ipmi_ipmb_addr    *ipmb_addr;
 	struct ipmi_recv_msg     *recv_msg;
-	struct ipmi_smi_handlers *handlers;
 
 	if (msg->rsp_size < 10) {
 		/* Message not big enough, just ignore it. */
@@ -3188,9 +3162,8 @@
 	}
 #endif
 		rcu_read_lock();
-		handlers = intf->handlers;
-		if (handlers) {
-			handlers->sender(intf->send_info, msg, 0);
+		if (!intf->in_shutdown) {
+			smi_send(intf, intf->handlers, msg, 0);
 			/*
 			 * We used the message, so return the value
 			 * that causes it to not be freed or
@@ -3857,32 +3830,32 @@
 
 	/* See if any waiting messages need to be processed. */
 	if (!run_to_completion)
-		spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
-	while (!list_empty(&intf->waiting_msgs)) {
-		smi_msg = list_entry(intf->waiting_msgs.next,
+		spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
+	while (!list_empty(&intf->waiting_rcv_msgs)) {
+		smi_msg = list_entry(intf->waiting_rcv_msgs.next,
 				     struct ipmi_smi_msg, link);
-		list_del(&smi_msg->link);
 		if (!run_to_completion)
-			spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
+			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
+					       flags);
 		rv = handle_one_recv_msg(intf, smi_msg);
 		if (!run_to_completion)
-			spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
-		if (rv == 0) {
-			/* Message handled */
-			ipmi_free_smi_msg(smi_msg);
-		} else if (rv < 0) {
-			/* Fatal error on the message, del but don't free. */
-		} else {
+			spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
+		if (rv > 0) {
 			/*
 			 * To preserve message order, quit if we
 			 * can't handle a message.
 			 */
-			list_add(&smi_msg->link, &intf->waiting_msgs);
 			break;
+		} else {
+			list_del(&smi_msg->link);
+			if (rv == 0)
+				/* Message handled */
+				ipmi_free_smi_msg(smi_msg);
+			/* If rv < 0, fatal error, del but don't free. */
 		}
 	}
 	if (!run_to_completion)
-		spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
+		spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock, flags);
 
 	/*
 	 * If the pretimout count is non-zero, decrement one from it and
@@ -3903,7 +3876,41 @@
 
 static void smi_recv_tasklet(unsigned long val)
 {
-	handle_new_recv_msgs((ipmi_smi_t) val);
+	unsigned long flags = 0; /* keep us warning-free. */
+	ipmi_smi_t intf = (ipmi_smi_t) val;
+	int run_to_completion = intf->run_to_completion;
+	struct ipmi_smi_msg *newmsg = NULL;
+
+	/*
+	 * Start the next message if available.
+	 *
+	 * Do this here, not in the actual receiver, because we may deadlock
+	 * because the lower layer is allowed to hold locks while calling
+	 * message delivery.
+	 */
+	if (!run_to_completion)
+		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+	if (intf->curr_msg == NULL && !intf->in_shutdown) {
+		struct list_head *entry = NULL;
+
+		/* Pick the high priority queue first. */
+		if (!list_empty(&intf->hp_xmit_msgs))
+			entry = intf->hp_xmit_msgs.next;
+		else if (!list_empty(&intf->xmit_msgs))
+			entry = intf->xmit_msgs.next;
+
+		if (entry) {
+			list_del(entry);
+			newmsg = list_entry(entry, struct ipmi_smi_msg, link);
+			intf->curr_msg = newmsg;
+		}
+	}
+	if (!run_to_completion)
+		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+	if (newmsg)
+		intf->handlers->sender(intf->send_info, newmsg);
+
+	handle_new_recv_msgs(intf);
 }
 
 /* Handle a new message from the lower layer. */
@@ -3911,13 +3918,16 @@
 			   struct ipmi_smi_msg *msg)
 {
 	unsigned long flags = 0; /* keep us warning-free. */
-	int           run_to_completion;
-
+	int run_to_completion = intf->run_to_completion;
 
 	if ((msg->data_size >= 2)
 	    && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
 	    && (msg->data[1] == IPMI_SEND_MSG_CMD)
 	    && (msg->user_data == NULL)) {
+
+		if (intf->in_shutdown)
+			goto free_msg;
+
 		/*
 		 * This is the local response to a command send, start
 		 * the timer for these.  The user_data will not be
@@ -3953,29 +3963,40 @@
 			/* The message was sent, start the timer. */
 			intf_start_seq_timer(intf, msg->msgid);
 
+free_msg:
 		ipmi_free_smi_msg(msg);
-		goto out;
+	} else {
+		/*
+		 * To preserve message order, we keep a queue and deliver from
+		 * a tasklet.
+		 */
+		if (!run_to_completion)
+			spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
+		list_add_tail(&msg->link, &intf->waiting_rcv_msgs);
+		if (!run_to_completion)
+			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
+					       flags);
 	}
 
-	/*
-	 * To preserve message order, if the list is not empty, we
-	 * tack this message onto the end of the list.
-	 */
-	run_to_completion = intf->run_to_completion;
 	if (!run_to_completion)
-		spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
-	list_add_tail(&msg->link, &intf->waiting_msgs);
+		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+	if (msg == intf->curr_msg)
+		intf->curr_msg = NULL;
 	if (!run_to_completion)
-		spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
+		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 
-	tasklet_schedule(&intf->recv_tasklet);
- out:
-	return;
+	if (run_to_completion)
+		smi_recv_tasklet((unsigned long) intf);
+	else
+		tasklet_schedule(&intf->recv_tasklet);
 }
 EXPORT_SYMBOL(ipmi_smi_msg_received);
 
 void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf)
 {
+	if (intf->in_shutdown)
+		return;
+
 	atomic_set(&intf->watchdog_pretimeouts_to_deliver, 1);
 	tasklet_schedule(&intf->recv_tasklet);
 }
@@ -4017,7 +4038,7 @@
 	struct ipmi_recv_msg     *msg;
 	struct ipmi_smi_handlers *handlers;
 
-	if (intf->intf_num == -1)
+	if (intf->in_shutdown)
 		return;
 
 	if (!ent->inuse)
@@ -4082,8 +4103,7 @@
 				ipmi_inc_stat(intf,
 					      retransmitted_ipmb_commands);
 
-			intf->handlers->sender(intf->send_info,
-					       smi_msg, 0);
+			smi_send(intf, intf->handlers, smi_msg, 0);
 		} else
 			ipmi_free_smi_msg(smi_msg);
 
@@ -4145,15 +4165,12 @@
 
 static void ipmi_request_event(ipmi_smi_t intf)
 {
-	struct ipmi_smi_handlers *handlers;
-
 	/* No event requests when in maintenance mode. */
 	if (intf->maintenance_mode_enable)
 		return;
 
-	handlers = intf->handlers;
-	if (handlers)
-		handlers->request_events(intf->send_info);
+	if (!intf->in_shutdown)
+		intf->handlers->request_events(intf->send_info);
 }
 
 static struct timer_list ipmi_timer;
@@ -4548,6 +4565,7 @@
 	proc_ipmi_root = proc_mkdir("ipmi", NULL);
 	if (!proc_ipmi_root) {
 	    printk(KERN_ERR PFX "Unable to create IPMI proc dir");
+	    driver_unregister(&ipmidriver.driver);
 	    return -ENOMEM;
 	}
 
diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
new file mode 100644
index 0000000..79524ed
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -0,0 +1,310 @@
+/*
+ * PowerNV OPAL IPMI driver
+ *
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#define pr_fmt(fmt)        "ipmi-powernv: " fmt
+
+#include <linux/ipmi_smi.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+
+
+struct ipmi_smi_powernv {
+	u64			interface_id;
+	struct ipmi_device_id	ipmi_id;
+	ipmi_smi_t		intf;
+	u64			event;
+	struct notifier_block	event_nb;
+
+	/**
+	 * We assume that there can only be one outstanding request, so
+	 * keep the pending message in cur_msg. We protect this from concurrent
+	 * updates through send & recv calls, (and consequently opal_msg, which
+	 * is in-use when cur_msg is set) with msg_lock
+	 */
+	spinlock_t		msg_lock;
+	struct ipmi_smi_msg	*cur_msg;
+	struct opal_ipmi_msg	*opal_msg;
+};
+
+static int ipmi_powernv_start_processing(void *send_info, ipmi_smi_t intf)
+{
+	struct ipmi_smi_powernv *smi = send_info;
+
+	smi->intf = intf;
+	return 0;
+}
+
+static void send_error_reply(struct ipmi_smi_powernv *smi,
+		struct ipmi_smi_msg *msg, u8 completion_code)
+{
+	msg->rsp[0] = msg->data[0] | 0x4;
+	msg->rsp[1] = msg->data[1];
+	msg->rsp[2] = completion_code;
+	msg->rsp_size = 3;
+	ipmi_smi_msg_received(smi->intf, msg);
+}
+
+static void ipmi_powernv_send(void *send_info, struct ipmi_smi_msg *msg)
+{
+	struct ipmi_smi_powernv *smi = send_info;
+	struct opal_ipmi_msg *opal_msg;
+	unsigned long flags;
+	int comp, rc;
+	size_t size;
+
+	/* ensure data_len will fit in the opal_ipmi_msg buffer... */
+	if (msg->data_size > IPMI_MAX_MSG_LENGTH) {
+		comp = IPMI_REQ_LEN_EXCEEDED_ERR;
+		goto err;
+	}
+
+	/* ... and that we at least have netfn and cmd bytes */
+	if (msg->data_size < 2) {
+		comp = IPMI_REQ_LEN_INVALID_ERR;
+		goto err;
+	}
+
+	spin_lock_irqsave(&smi->msg_lock, flags);
+
+	if (smi->cur_msg) {
+		comp = IPMI_NODE_BUSY_ERR;
+		goto err_unlock;
+	}
+
+	/* format our data for the OPAL API */
+	opal_msg = smi->opal_msg;
+	opal_msg->version = OPAL_IPMI_MSG_FORMAT_VERSION_1;
+	opal_msg->netfn = msg->data[0];
+	opal_msg->cmd = msg->data[1];
+	if (msg->data_size > 2)
+		memcpy(opal_msg->data, msg->data + 2, msg->data_size - 2);
+
+	/* data_size already includes the netfn and cmd bytes */
+	size = sizeof(*opal_msg) + msg->data_size - 2;
+
+	pr_devel("%s: opal_ipmi_send(0x%llx, %p, %ld)\n", __func__,
+			smi->interface_id, opal_msg, size);
+	rc = opal_ipmi_send(smi->interface_id, opal_msg, size);
+	pr_devel("%s:  -> %d\n", __func__, rc);
+
+	if (!rc) {
+		smi->cur_msg = msg;
+		spin_unlock_irqrestore(&smi->msg_lock, flags);
+		return;
+	}
+
+	comp = IPMI_ERR_UNSPECIFIED;
+err_unlock:
+	spin_unlock_irqrestore(&smi->msg_lock, flags);
+err:
+	send_error_reply(smi, msg, comp);
+}
+
+static int ipmi_powernv_recv(struct ipmi_smi_powernv *smi)
+{
+	struct opal_ipmi_msg *opal_msg;
+	struct ipmi_smi_msg *msg;
+	unsigned long flags;
+	uint64_t size;
+	int rc;
+
+	pr_devel("%s: opal_ipmi_recv(%llx, msg, sz)\n", __func__,
+			smi->interface_id);
+
+	spin_lock_irqsave(&smi->msg_lock, flags);
+
+	if (!smi->cur_msg) {
+		pr_warn("no current message?\n");
+		return 0;
+	}
+
+	msg = smi->cur_msg;
+	opal_msg = smi->opal_msg;
+
+	size = cpu_to_be64(sizeof(*opal_msg) + IPMI_MAX_MSG_LENGTH);
+
+	rc = opal_ipmi_recv(smi->interface_id,
+			opal_msg,
+			&size);
+	size = be64_to_cpu(size);
+	pr_devel("%s:   -> %d (size %lld)\n", __func__,
+			rc, rc == 0 ? size : 0);
+	if (rc) {
+		spin_unlock_irqrestore(&smi->msg_lock, flags);
+		ipmi_free_smi_msg(msg);
+		return 0;
+	}
+
+	if (size < sizeof(*opal_msg)) {
+		spin_unlock_irqrestore(&smi->msg_lock, flags);
+		pr_warn("unexpected IPMI message size %lld\n", size);
+		return 0;
+	}
+
+	if (opal_msg->version != OPAL_IPMI_MSG_FORMAT_VERSION_1) {
+		spin_unlock_irqrestore(&smi->msg_lock, flags);
+		pr_warn("unexpected IPMI message format (version %d)\n",
+				opal_msg->version);
+		return 0;
+	}
+
+	msg->rsp[0] = opal_msg->netfn;
+	msg->rsp[1] = opal_msg->cmd;
+	if (size > sizeof(*opal_msg))
+		memcpy(&msg->rsp[2], opal_msg->data, size - sizeof(*opal_msg));
+	msg->rsp_size = 2 + size - sizeof(*opal_msg);
+
+	smi->cur_msg = NULL;
+	spin_unlock_irqrestore(&smi->msg_lock, flags);
+	ipmi_smi_msg_received(smi->intf, msg);
+	return 0;
+}
+
+static void ipmi_powernv_request_events(void *send_info)
+{
+}
+
+static void ipmi_powernv_set_run_to_completion(void *send_info,
+		bool run_to_completion)
+{
+}
+
+static void ipmi_powernv_poll(void *send_info)
+{
+	struct ipmi_smi_powernv *smi = send_info;
+
+	ipmi_powernv_recv(smi);
+}
+
+static struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
+	.owner			= THIS_MODULE,
+	.start_processing	= ipmi_powernv_start_processing,
+	.sender			= ipmi_powernv_send,
+	.request_events		= ipmi_powernv_request_events,
+	.set_run_to_completion	= ipmi_powernv_set_run_to_completion,
+	.poll			= ipmi_powernv_poll,
+};
+
+static int ipmi_opal_event(struct notifier_block *nb,
+			  unsigned long events, void *change)
+{
+	struct ipmi_smi_powernv *smi = container_of(nb,
+					struct ipmi_smi_powernv, event_nb);
+
+	if (events & smi->event)
+		ipmi_powernv_recv(smi);
+	return 0;
+}
+
+static int ipmi_powernv_probe(struct platform_device *pdev)
+{
+	struct ipmi_smi_powernv *ipmi;
+	struct device *dev;
+	u32 prop;
+	int rc;
+
+	if (!pdev || !pdev->dev.of_node)
+		return -ENODEV;
+
+	dev = &pdev->dev;
+
+	ipmi = devm_kzalloc(dev, sizeof(*ipmi), GFP_KERNEL);
+	if (!ipmi)
+		return -ENOMEM;
+
+	spin_lock_init(&ipmi->msg_lock);
+
+	rc = of_property_read_u32(dev->of_node, "ibm,ipmi-interface-id",
+			&prop);
+	if (rc) {
+		dev_warn(dev, "No interface ID property\n");
+		goto err_free;
+	}
+	ipmi->interface_id = prop;
+
+	rc = of_property_read_u32(dev->of_node, "interrupts", &prop);
+	if (rc) {
+		dev_warn(dev, "No interrupts property\n");
+		goto err_free;
+	}
+
+	ipmi->event = 1ull << prop;
+	ipmi->event_nb.notifier_call = ipmi_opal_event;
+
+	rc = opal_notifier_register(&ipmi->event_nb);
+	if (rc) {
+		dev_warn(dev, "OPAL notifier registration failed (%d)\n", rc);
+		goto err_free;
+	}
+
+	ipmi->opal_msg = devm_kmalloc(dev,
+			sizeof(*ipmi->opal_msg) + IPMI_MAX_MSG_LENGTH,
+			GFP_KERNEL);
+	if (!ipmi->opal_msg) {
+		rc = -ENOMEM;
+		goto err_unregister;
+	}
+
+	/* todo: query actual ipmi_device_id */
+	rc = ipmi_register_smi(&ipmi_powernv_smi_handlers, ipmi,
+			&ipmi->ipmi_id, dev, 0);
+	if (rc) {
+		dev_warn(dev, "IPMI SMI registration failed (%d)\n", rc);
+		goto err_free_msg;
+	}
+
+	dev_set_drvdata(dev, ipmi);
+	return 0;
+
+err_free_msg:
+	devm_kfree(dev, ipmi->opal_msg);
+err_unregister:
+	opal_notifier_unregister(&ipmi->event_nb);
+err_free:
+	devm_kfree(dev, ipmi);
+	return rc;
+}
+
+static int ipmi_powernv_remove(struct platform_device *pdev)
+{
+	struct ipmi_smi_powernv *smi = dev_get_drvdata(&pdev->dev);
+
+	ipmi_unregister_smi(smi->intf);
+	opal_notifier_unregister(&smi->event_nb);
+	return 0;
+}
+
+static const struct of_device_id ipmi_powernv_match[] = {
+	{ .compatible = "ibm,opal-ipmi" },
+	{ },
+};
+
+
+static struct platform_driver powernv_ipmi_driver = {
+	.driver = {
+		.name		= "ipmi-powernv",
+		.owner		= THIS_MODULE,
+		.of_match_table	= ipmi_powernv_match,
+	},
+	.probe	= ipmi_powernv_probe,
+	.remove	= ipmi_powernv_remove,
+};
+
+
+module_platform_driver(powernv_ipmi_driver);
+
+MODULE_DEVICE_TABLE(of, ipmi_powernv_match);
+MODULE_DESCRIPTION("powernv IPMI driver");
+MODULE_AUTHOR("Jeremy Kerr <jk@ozlabs.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 5c4e1f6..90c7fdf 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -92,12 +92,9 @@
 	SI_GETTING_FLAGS,
 	SI_GETTING_EVENTS,
 	SI_CLEARING_FLAGS,
-	SI_CLEARING_FLAGS_THEN_SET_IRQ,
 	SI_GETTING_MESSAGES,
-	SI_ENABLE_INTERRUPTS1,
-	SI_ENABLE_INTERRUPTS2,
-	SI_DISABLE_INTERRUPTS1,
-	SI_DISABLE_INTERRUPTS2
+	SI_CHECKING_ENABLES,
+	SI_SETTING_ENABLES
 	/* FIXME - add watchdog stuff. */
 };
 
@@ -111,10 +108,6 @@
 };
 static char *si_to_str[] = { "kcs", "smic", "bt" };
 
-static char *ipmi_addr_src_to_str[] = { NULL, "hotmod", "hardcoded", "SPMI",
-					"ACPI", "SMBIOS", "PCI",
-					"device-tree", "default" };
-
 #define DEVICE_NAME "ipmi_si"
 
 static struct platform_driver ipmi_driver;
@@ -174,8 +167,7 @@
 	struct si_sm_handlers  *handlers;
 	enum si_type           si_type;
 	spinlock_t             si_lock;
-	struct list_head       xmit_msgs;
-	struct list_head       hp_xmit_msgs;
+	struct ipmi_smi_msg    *waiting_msg;
 	struct ipmi_smi_msg    *curr_msg;
 	enum si_intf_state     si_state;
 
@@ -254,9 +246,6 @@
 	/* The time (in jiffies) the last timeout occurred at. */
 	unsigned long       last_timeout_jiffies;
 
-	/* Used to gracefully stop the timer without race conditions. */
-	atomic_t            stop_operation;
-
 	/* Are we waiting for the events, pretimeouts, received msgs? */
 	atomic_t            need_watch;
 
@@ -268,6 +257,16 @@
 	 */
 	bool interrupt_disabled;
 
+	/*
+	 * Does the BMC support events?
+	 */
+	bool supports_event_msg_buff;
+
+	/*
+	 * Did we get an attention that we did not handle?
+	 */
+	bool got_attn;
+
 	/* From the get device id response... */
 	struct ipmi_device_id device_id;
 
@@ -332,7 +331,10 @@
 			     struct ipmi_smi_msg *msg)
 {
 	/* Deliver the message to the upper layer. */
-	ipmi_smi_msg_received(smi_info->intf, msg);
+	if (smi_info->intf)
+		ipmi_smi_msg_received(smi_info->intf, msg);
+	else
+		ipmi_free_smi_msg(msg);
 }
 
 static void return_hosed_msg(struct smi_info *smi_info, int cCode)
@@ -356,28 +358,18 @@
 static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 {
 	int              rv;
-	struct list_head *entry = NULL;
 #ifdef DEBUG_TIMING
 	struct timeval t;
 #endif
 
-	/* Pick the high priority queue first. */
-	if (!list_empty(&(smi_info->hp_xmit_msgs))) {
-		entry = smi_info->hp_xmit_msgs.next;
-	} else if (!list_empty(&(smi_info->xmit_msgs))) {
-		entry = smi_info->xmit_msgs.next;
-	}
-
-	if (!entry) {
+	if (!smi_info->waiting_msg) {
 		smi_info->curr_msg = NULL;
 		rv = SI_SM_IDLE;
 	} else {
 		int err;
 
-		list_del(entry);
-		smi_info->curr_msg = list_entry(entry,
-						struct ipmi_smi_msg,
-						link);
+		smi_info->curr_msg = smi_info->waiting_msg;
+		smi_info->waiting_msg = NULL;
 #ifdef DEBUG_TIMING
 		do_gettimeofday(&t);
 		printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
@@ -401,22 +393,7 @@
 	return rv;
 }
 
-static void start_enable_irq(struct smi_info *smi_info)
-{
-	unsigned char msg[2];
-
-	/*
-	 * If we are enabling interrupts, we have to tell the
-	 * BMC to use them.
-	 */
-	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
-	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
-
-	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
-	smi_info->si_state = SI_ENABLE_INTERRUPTS1;
-}
-
-static void start_disable_irq(struct smi_info *smi_info)
+static void start_check_enables(struct smi_info *smi_info)
 {
 	unsigned char msg[2];
 
@@ -424,7 +401,7 @@
 	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
 	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
-	smi_info->si_state = SI_DISABLE_INTERRUPTS1;
+	smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
 static void start_clear_flags(struct smi_info *smi_info)
@@ -440,6 +417,32 @@
 	smi_info->si_state = SI_CLEARING_FLAGS;
 }
 
+static void start_getting_msg_queue(struct smi_info *smi_info)
+{
+	smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD;
+	smi_info->curr_msg->data_size = 2;
+
+	smi_info->handlers->start_transaction(
+		smi_info->si_sm,
+		smi_info->curr_msg->data,
+		smi_info->curr_msg->data_size);
+	smi_info->si_state = SI_GETTING_MESSAGES;
+}
+
+static void start_getting_events(struct smi_info *smi_info)
+{
+	smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
+	smi_info->curr_msg->data_size = 2;
+
+	smi_info->handlers->start_transaction(
+		smi_info->si_sm,
+		smi_info->curr_msg->data,
+		smi_info->curr_msg->data_size);
+	smi_info->si_state = SI_GETTING_EVENTS;
+}
+
 static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
 {
 	smi_info->last_timeout_jiffies = jiffies;
@@ -453,22 +456,45 @@
  * polled until we can allocate some memory.  Once we have some
  * memory, we will re-enable the interrupt.
  */
-static inline void disable_si_irq(struct smi_info *smi_info)
+static inline bool disable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
-		start_disable_irq(smi_info);
 		smi_info->interrupt_disabled = true;
-		if (!atomic_read(&smi_info->stop_operation))
-			smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
+		start_check_enables(smi_info);
+		return true;
 	}
+	return false;
 }
 
-static inline void enable_si_irq(struct smi_info *smi_info)
+static inline bool enable_si_irq(struct smi_info *smi_info)
 {
 	if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
-		start_enable_irq(smi_info);
 		smi_info->interrupt_disabled = false;
+		start_check_enables(smi_info);
+		return true;
 	}
+	return false;
+}
+
+/*
+ * Allocate a message.  If unable to allocate, start the interrupt
+ * disable process and return NULL.  If able to allocate but
+ * interrupts are disabled, free the message and return NULL after
+ * starting the interrupt enable process.
+ */
+static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
+{
+	struct ipmi_smi_msg *msg;
+
+	msg = ipmi_alloc_smi_msg();
+	if (!msg) {
+		if (!disable_si_irq(smi_info))
+			smi_info->si_state = SI_NORMAL;
+	} else if (enable_si_irq(smi_info)) {
+		ipmi_free_smi_msg(msg);
+		msg = NULL;
+	}
+	return msg;
 }
 
 static void handle_flags(struct smi_info *smi_info)
@@ -480,45 +506,22 @@
 
 		start_clear_flags(smi_info);
 		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
-		ipmi_smi_watchdog_pretimeout(smi_info->intf);
+		if (smi_info->intf)
+			ipmi_smi_watchdog_pretimeout(smi_info->intf);
 	} else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) {
 		/* Messages available. */
-		smi_info->curr_msg = ipmi_alloc_smi_msg();
-		if (!smi_info->curr_msg) {
-			disable_si_irq(smi_info);
-			smi_info->si_state = SI_NORMAL;
+		smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
+		if (!smi_info->curr_msg)
 			return;
-		}
-		enable_si_irq(smi_info);
 
-		smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
-		smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD;
-		smi_info->curr_msg->data_size = 2;
-
-		smi_info->handlers->start_transaction(
-			smi_info->si_sm,
-			smi_info->curr_msg->data,
-			smi_info->curr_msg->data_size);
-		smi_info->si_state = SI_GETTING_MESSAGES;
+		start_getting_msg_queue(smi_info);
 	} else if (smi_info->msg_flags & EVENT_MSG_BUFFER_FULL) {
 		/* Events available. */
-		smi_info->curr_msg = ipmi_alloc_smi_msg();
-		if (!smi_info->curr_msg) {
-			disable_si_irq(smi_info);
-			smi_info->si_state = SI_NORMAL;
+		smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
+		if (!smi_info->curr_msg)
 			return;
-		}
-		enable_si_irq(smi_info);
 
-		smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
-		smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
-		smi_info->curr_msg->data_size = 2;
-
-		smi_info->handlers->start_transaction(
-			smi_info->si_sm,
-			smi_info->curr_msg->data,
-			smi_info->curr_msg->data_size);
-		smi_info->si_state = SI_GETTING_EVENTS;
+		start_getting_events(smi_info);
 	} else if (smi_info->msg_flags & OEM_DATA_AVAIL &&
 		   smi_info->oem_data_avail_handler) {
 		if (smi_info->oem_data_avail_handler(smi_info))
@@ -527,6 +530,55 @@
 		smi_info->si_state = SI_NORMAL;
 }
 
+/*
+ * Global enables we care about.
+ */
+#define GLOBAL_ENABLES_MASK (IPMI_BMC_EVT_MSG_BUFF | IPMI_BMC_RCV_MSG_INTR | \
+			     IPMI_BMC_EVT_MSG_INTR)
+
+static u8 current_global_enables(struct smi_info *smi_info, u8 base,
+				 bool *irq_on)
+{
+	u8 enables = 0;
+
+	if (smi_info->supports_event_msg_buff)
+		enables |= IPMI_BMC_EVT_MSG_BUFF;
+	else
+		enables &= ~IPMI_BMC_EVT_MSG_BUFF;
+
+	if (smi_info->irq && !smi_info->interrupt_disabled)
+		enables |= IPMI_BMC_RCV_MSG_INTR;
+	else
+		enables &= ~IPMI_BMC_RCV_MSG_INTR;
+
+	if (smi_info->supports_event_msg_buff &&
+	    smi_info->irq && !smi_info->interrupt_disabled)
+
+		enables |= IPMI_BMC_EVT_MSG_INTR;
+	else
+		enables &= ~IPMI_BMC_EVT_MSG_INTR;
+
+	*irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR);
+
+	return enables;
+}
+
+static void check_bt_irq(struct smi_info *smi_info, bool irq_on)
+{
+	u8 irqstate = smi_info->io.inputb(&smi_info->io, IPMI_BT_INTMASK_REG);
+
+	irqstate &= IPMI_BT_INTMASK_ENABLE_IRQ_BIT;
+
+	if ((bool)irqstate == irq_on)
+		return;
+
+	if (irq_on)
+		smi_info->io.outputb(&smi_info->io, IPMI_BT_INTMASK_REG,
+				     IPMI_BT_INTMASK_ENABLE_IRQ_BIT);
+	else
+		smi_info->io.outputb(&smi_info->io, IPMI_BT_INTMASK_REG, 0);
+}
+
 static void handle_transaction_done(struct smi_info *smi_info)
 {
 	struct ipmi_smi_msg *msg;
@@ -581,7 +633,6 @@
 	}
 
 	case SI_CLEARING_FLAGS:
-	case SI_CLEARING_FLAGS_THEN_SET_IRQ:
 	{
 		unsigned char msg[3];
 
@@ -592,10 +643,7 @@
 			dev_warn(smi_info->dev,
 				 "Error clearing flags: %2.2x\n", msg[2]);
 		}
-		if (smi_info->si_state == SI_CLEARING_FLAGS_THEN_SET_IRQ)
-			start_enable_irq(smi_info);
-		else
-			smi_info->si_state = SI_NORMAL;
+		smi_info->si_state = SI_NORMAL;
 		break;
 	}
 
@@ -675,9 +723,11 @@
 		break;
 	}
 
-	case SI_ENABLE_INTERRUPTS1:
+	case SI_CHECKING_ENABLES:
 	{
 		unsigned char msg[4];
+		u8 enables;
+		bool irq_on;
 
 		/* We got the flags from the SMI, now handle them. */
 		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
@@ -687,70 +737,53 @@
 			dev_warn(smi_info->dev,
 				 "Maybe ok, but ipmi might run very slowly.\n");
 			smi_info->si_state = SI_NORMAL;
-		} else {
+			break;
+		}
+		enables = current_global_enables(smi_info, 0, &irq_on);
+		if (smi_info->si_type == SI_BT)
+			/* BT has its own interrupt enable bit. */
+			check_bt_irq(smi_info, irq_on);
+		if (enables != (msg[3] & GLOBAL_ENABLES_MASK)) {
+			/* Enables are not correct, fix them. */
 			msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
 			msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
-			msg[2] = (msg[3] |
-				  IPMI_BMC_RCV_MSG_INTR |
-				  IPMI_BMC_EVT_MSG_INTR);
+			msg[2] = enables | (msg[3] & ~GLOBAL_ENABLES_MASK);
 			smi_info->handlers->start_transaction(
 				smi_info->si_sm, msg, 3);
-			smi_info->si_state = SI_ENABLE_INTERRUPTS2;
-		}
-		break;
-	}
-
-	case SI_ENABLE_INTERRUPTS2:
-	{
-		unsigned char msg[4];
-
-		/* We got the flags from the SMI, now handle them. */
-		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
-		if (msg[2] != 0) {
-			dev_warn(smi_info->dev,
-				 "Couldn't set irq info: %x.\n", msg[2]);
-			dev_warn(smi_info->dev,
-				 "Maybe ok, but ipmi might run very slowly.\n");
-		} else
-			smi_info->interrupt_disabled = false;
-		smi_info->si_state = SI_NORMAL;
-		break;
-	}
-
-	case SI_DISABLE_INTERRUPTS1:
-	{
-		unsigned char msg[4];
-
-		/* We got the flags from the SMI, now handle them. */
-		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
-		if (msg[2] != 0) {
-			dev_warn(smi_info->dev, "Could not disable interrupts"
-				 ", failed get.\n");
+			smi_info->si_state = SI_SETTING_ENABLES;
+		} else if (smi_info->supports_event_msg_buff) {
+			smi_info->curr_msg = ipmi_alloc_smi_msg();
+			if (!smi_info->curr_msg) {
+				smi_info->si_state = SI_NORMAL;
+				break;
+			}
+			start_getting_msg_queue(smi_info);
+		} else {
 			smi_info->si_state = SI_NORMAL;
-		} else {
-			msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
-			msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
-			msg[2] = (msg[3] &
-				  ~(IPMI_BMC_RCV_MSG_INTR |
-				    IPMI_BMC_EVT_MSG_INTR));
-			smi_info->handlers->start_transaction(
-				smi_info->si_sm, msg, 3);
-			smi_info->si_state = SI_DISABLE_INTERRUPTS2;
 		}
 		break;
 	}
 
-	case SI_DISABLE_INTERRUPTS2:
+	case SI_SETTING_ENABLES:
 	{
 		unsigned char msg[4];
 
-		/* We got the flags from the SMI, now handle them. */
 		smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
-		if (msg[2] != 0) {
-			dev_warn(smi_info->dev, "Could not disable interrupts"
-				 ", failed set.\n");
+		if (msg[2] != 0)
+			dev_warn(smi_info->dev,
+				 "Could not set the global enables: 0x%x.\n",
+				 msg[2]);
+
+		if (smi_info->supports_event_msg_buff) {
+			smi_info->curr_msg = ipmi_alloc_smi_msg();
+			if (!smi_info->curr_msg) {
+				smi_info->si_state = SI_NORMAL;
+				break;
+			}
+			start_getting_msg_queue(smi_info);
+		} else {
+			smi_info->si_state = SI_NORMAL;
 		}
-		smi_info->si_state = SI_NORMAL;
 		break;
 	}
 	}
@@ -808,25 +841,35 @@
 	 * We prefer handling attn over new messages.  But don't do
 	 * this if there is not yet an upper layer to handle anything.
 	 */
-	if (likely(smi_info->intf) && si_sm_result == SI_SM_ATTN) {
+	if (likely(smi_info->intf) &&
+	    (si_sm_result == SI_SM_ATTN || smi_info->got_attn)) {
 		unsigned char msg[2];
 
-		smi_inc_stat(smi_info, attentions);
+		if (smi_info->si_state != SI_NORMAL) {
+			/*
+			 * We got an ATTN, but we are doing something else.
+			 * Handle the ATTN later.
+			 */
+			smi_info->got_attn = true;
+		} else {
+			smi_info->got_attn = false;
+			smi_inc_stat(smi_info, attentions);
 
-		/*
-		 * Got a attn, send down a get message flags to see
-		 * what's causing it.  It would be better to handle
-		 * this in the upper layer, but due to the way
-		 * interrupts work with the SMI, that's not really
-		 * possible.
-		 */
-		msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
-		msg[1] = IPMI_GET_MSG_FLAGS_CMD;
+			/*
+			 * Got a attn, send down a get message flags to see
+			 * what's causing it.  It would be better to handle
+			 * this in the upper layer, but due to the way
+			 * interrupts work with the SMI, that's not really
+			 * possible.
+			 */
+			msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
+			msg[1] = IPMI_GET_MSG_FLAGS_CMD;
 
-		smi_info->handlers->start_transaction(
-			smi_info->si_sm, msg, 2);
-		smi_info->si_state = SI_GETTING_FLAGS;
-		goto restart;
+			smi_info->handlers->start_transaction(
+				smi_info->si_sm, msg, 2);
+			smi_info->si_state = SI_GETTING_FLAGS;
+			goto restart;
+		}
 	}
 
 	/* If we are currently idle, try to start the next message. */
@@ -846,19 +889,21 @@
 		 */
 		atomic_set(&smi_info->req_events, 0);
 
-		smi_info->curr_msg = ipmi_alloc_smi_msg();
-		if (!smi_info->curr_msg)
-			goto out;
+		/*
+		 * Take this opportunity to check the interrupt and
+		 * message enable state for the BMC.  The BMC can be
+		 * asynchronously reset, and may thus get interrupts
+		 * disable and messages disabled.
+		 */
+		if (smi_info->supports_event_msg_buff || smi_info->irq) {
+			start_check_enables(smi_info);
+		} else {
+			smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
+			if (!smi_info->curr_msg)
+				goto out;
 
-		smi_info->curr_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
-		smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
-		smi_info->curr_msg->data_size = 2;
-
-		smi_info->handlers->start_transaction(
-			smi_info->si_sm,
-			smi_info->curr_msg->data,
-			smi_info->curr_msg->data_size);
-		smi_info->si_state = SI_GETTING_EVENTS;
+			start_getting_events(smi_info);
+		}
 		goto restart;
 	}
  out:
@@ -879,8 +924,7 @@
 }
 
 static void sender(void                *send_info,
-		   struct ipmi_smi_msg *msg,
-		   int                 priority)
+		   struct ipmi_smi_msg *msg)
 {
 	struct smi_info   *smi_info = send_info;
 	enum si_sm_result result;
@@ -889,14 +933,8 @@
 	struct timeval    t;
 #endif
 
-	if (atomic_read(&smi_info->stop_operation)) {
-		msg->rsp[0] = msg->data[0] | 4;
-		msg->rsp[1] = msg->data[1];
-		msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
-		msg->rsp_size = 3;
-		deliver_recv_msg(smi_info, msg);
-		return;
-	}
+	BUG_ON(smi_info->waiting_msg);
+	smi_info->waiting_msg = msg;
 
 #ifdef DEBUG_TIMING
 	do_gettimeofday(&t);
@@ -905,16 +943,16 @@
 
 	if (smi_info->run_to_completion) {
 		/*
-		 * If we are running to completion, then throw it in
-		 * the list and run transactions until everything is
-		 * clear.  Priority doesn't matter here.
+		 * If we are running to completion, start it and run
+		 * transactions until everything is clear.
 		 */
+		smi_info->curr_msg = smi_info->waiting_msg;
+		smi_info->waiting_msg = NULL;
 
 		/*
 		 * Run to completion means we are single-threaded, no
 		 * need for locks.
 		 */
-		list_add_tail(&(msg->link), &(smi_info->xmit_msgs));
 
 		result = smi_event_handler(smi_info, 0);
 		while (result != SI_SM_IDLE) {
@@ -926,11 +964,6 @@
 	}
 
 	spin_lock_irqsave(&smi_info->si_lock, flags);
-	if (priority > 0)
-		list_add_tail(&msg->link, &smi_info->hp_xmit_msgs);
-	else
-		list_add_tail(&msg->link, &smi_info->xmit_msgs);
-
 	check_start_timer_thread(smi_info);
 	spin_unlock_irqrestore(&smi_info->si_lock, flags);
 }
@@ -1068,8 +1101,7 @@
 {
 	struct smi_info *smi_info = send_info;
 
-	if (atomic_read(&smi_info->stop_operation) ||
-				!smi_info->has_event_buffer)
+	if (!smi_info->has_event_buffer)
 		return;
 
 	atomic_set(&smi_info->req_events, 1);
@@ -1697,7 +1729,7 @@
 	}
 	*s = '\0';
 	s++;
-	for (i = 0; hotmod_ops[i].name; i++) {
+	for (i = 0; v[i].name; i++) {
 		if (strcmp(*curr, v[i].name) == 0) {
 			*val = v[i].val;
 			*curr = s;
@@ -2133,6 +2165,9 @@
 	case 3:	/* BT */
 		info->si_type = SI_BT;
 		break;
+	case 4: /* SSIF, just ignore */
+		kfree(info);
+		return -EIO;
 	default:
 		printk(KERN_INFO PFX "Unknown ACPI/SPMI SI type %d\n",
 		       spmi->InterfaceType);
@@ -2250,6 +2285,8 @@
 	case 3:
 		info->si_type = SI_BT;
 		break;
+	case 4: /* SSIF, just ignore */
+		goto err_free;
 	default:
 		dev_info(&dev->dev, "unknown IPMI type %lld\n", tmp);
 		goto err_free;
@@ -2913,9 +2950,11 @@
 		goto out;
 	}
 
-	if (resp[3] & IPMI_BMC_EVT_MSG_BUFF)
+	if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) {
 		/* buffer is already enabled, nothing to do. */
+		smi_info->supports_event_msg_buff = true;
 		goto out;
+	}
 
 	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
 	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
@@ -2948,6 +2987,9 @@
 		 * that the event buffer is not supported.
 		 */
 		rv = -ENOENT;
+	else
+		smi_info->supports_event_msg_buff = true;
+
  out:
 	kfree(resp);
 	return rv;
@@ -3188,15 +3230,10 @@
 
 static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 {
-	if (smi_info->intf) {
-		/*
-		 * The timer and thread are only running if the
-		 * interface has been started up and registered.
-		 */
-		if (smi_info->thread != NULL)
-			kthread_stop(smi_info->thread);
+	if (smi_info->thread != NULL)
+		kthread_stop(smi_info->thread);
+	if (smi_info->timer_running)
 		del_timer_sync(&smi_info->si_timer);
-	}
 }
 
 static struct ipmi_default_vals
@@ -3274,8 +3311,8 @@
 	int rv = 0;
 
 	printk(KERN_INFO PFX "Adding %s-specified %s state machine",
-			ipmi_addr_src_to_str[new_smi->addr_source],
-			si_to_str[new_smi->si_type]);
+	       ipmi_addr_src_to_str(new_smi->addr_source),
+	       si_to_str[new_smi->si_type]);
 	mutex_lock(&smi_infos_lock);
 	if (!is_new_interface(new_smi)) {
 		printk(KERN_CONT " duplicate interface\n");
@@ -3305,7 +3342,7 @@
 	printk(KERN_INFO PFX "Trying %s-specified %s state"
 	       " machine at %s address 0x%lx, slave address 0x%x,"
 	       " irq %d\n",
-	       ipmi_addr_src_to_str[new_smi->addr_source],
+	       ipmi_addr_src_to_str(new_smi->addr_source),
 	       si_to_str[new_smi->si_type],
 	       addr_space_to_str[new_smi->io.addr_type],
 	       new_smi->io.addr_data,
@@ -3371,8 +3408,7 @@
 	setup_oem_data_handler(new_smi);
 	setup_xaction_handlers(new_smi);
 
-	INIT_LIST_HEAD(&(new_smi->xmit_msgs));
-	INIT_LIST_HEAD(&(new_smi->hp_xmit_msgs));
+	new_smi->waiting_msg = NULL;
 	new_smi->curr_msg = NULL;
 	atomic_set(&new_smi->req_events, 0);
 	new_smi->run_to_completion = false;
@@ -3380,7 +3416,6 @@
 		atomic_set(&new_smi->stats[i], 0);
 
 	new_smi->interrupt_disabled = true;
-	atomic_set(&new_smi->stop_operation, 0);
 	atomic_set(&new_smi->need_watch, 0);
 	new_smi->intf_num = smi_num;
 	smi_num++;
@@ -3394,9 +3429,15 @@
 	 * timer to avoid racing with the timer.
 	 */
 	start_clear_flags(new_smi);
-	/* IRQ is defined to be set when non-zero. */
-	if (new_smi->irq)
-		new_smi->si_state = SI_CLEARING_FLAGS_THEN_SET_IRQ;
+
+	/*
+	 * IRQ is defined to be set when non-zero.  req_events will
+	 * cause a global flags check that will enable interrupts.
+	 */
+	if (new_smi->irq) {
+		new_smi->interrupt_disabled = false;
+		atomic_set(&new_smi->req_events, 1);
+	}
 
 	if (!new_smi->dev) {
 		/*
@@ -3428,7 +3469,6 @@
 			       new_smi,
 			       &new_smi->device_id,
 			       new_smi->dev,
-			       "bmc",
 			       new_smi->slave_addr);
 	if (rv) {
 		dev_err(new_smi->dev, "Unable to register device: error %d\n",
@@ -3466,15 +3506,15 @@
 	return 0;
 
  out_err_stop_timer:
-	atomic_inc(&new_smi->stop_operation);
 	wait_for_timer_and_thread(new_smi);
 
  out_err:
 	new_smi->interrupt_disabled = true;
 
 	if (new_smi->intf) {
-		ipmi_unregister_smi(new_smi->intf);
+		ipmi_smi_t intf = new_smi->intf;
 		new_smi->intf = NULL;
+		ipmi_unregister_smi(intf);
 	}
 
 	if (new_smi->irq_cleanup) {
@@ -3653,60 +3693,49 @@
 static void cleanup_one_si(struct smi_info *to_clean)
 {
 	int           rv = 0;
-	unsigned long flags;
 
 	if (!to_clean)
 		return;
 
+	if (to_clean->intf) {
+		ipmi_smi_t intf = to_clean->intf;
+
+		to_clean->intf = NULL;
+		rv = ipmi_unregister_smi(intf);
+		if (rv) {
+			pr_err(PFX "Unable to unregister device: errno=%d\n",
+			       rv);
+		}
+	}
+
 	if (to_clean->dev)
 		dev_set_drvdata(to_clean->dev, NULL);
 
 	list_del(&to_clean->link);
 
-	/* Tell the driver that we are shutting down. */
-	atomic_inc(&to_clean->stop_operation);
-
 	/*
-	 * Make sure the timer and thread are stopped and will not run
-	 * again.
+	 * Make sure that interrupts, the timer and the thread are
+	 * stopped and will not run again.
 	 */
+	if (to_clean->irq_cleanup)
+		to_clean->irq_cleanup(to_clean);
 	wait_for_timer_and_thread(to_clean);
 
 	/*
 	 * Timeouts are stopped, now make sure the interrupts are off
-	 * for the device.  A little tricky with locks to make sure
-	 * there are no races.
+	 * in the BMC.  Note that timers and CPU interrupts are off,
+	 * so no need for locks.
 	 */
-	spin_lock_irqsave(&to_clean->si_lock, flags);
 	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
-		spin_unlock_irqrestore(&to_clean->si_lock, flags);
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
-		spin_lock_irqsave(&to_clean->si_lock, flags);
 	}
 	disable_si_irq(to_clean);
-	spin_unlock_irqrestore(&to_clean->si_lock, flags);
 	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
 	}
 
-	/* Clean up interrupts and make sure that everything is done. */
-	if (to_clean->irq_cleanup)
-		to_clean->irq_cleanup(to_clean);
-	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
-		poll(to_clean);
-		schedule_timeout_uninterruptible(1);
-	}
-
-	if (to_clean->intf)
-		rv = ipmi_unregister_smi(to_clean->intf);
-
-	if (rv) {
-		printk(KERN_ERR PFX "Unable to unregister device: errno=%d\n",
-		       rv);
-	}
-
 	if (to_clean->handlers)
 		to_clean->handlers->cleanup(to_clean->si_sm);
 
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
new file mode 100644
index 0000000..e178ac2
--- /dev/null
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -0,0 +1,1870 @@
+/*
+ * ipmi_ssif.c
+ *
+ * The interface to the IPMI driver for SMBus access to a SMBus
+ * compliant device.  Called SSIF by the IPMI spec.
+ *
+ * Author: Intel Corporation
+ *         Todd Davis <todd.c.davis@intel.com>
+ *
+ * Rewritten by Corey Minyard <minyard@acm.org> to support the
+ * non-blocking I2C interface, add support for multi-part
+ * transactions, add PEC support, and general clenaup.
+ *
+ * Copyright 2003 Intel Corporation
+ * Copyright 2005 MontaVista Software
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ */
+
+/*
+ * This file holds the "policy" for the interface to the SSIF state
+ * machine.  It does the configuration, handles timers and interrupts,
+ * and drives the real SSIF state machine.
+ */
+
+/*
+ * TODO: Figure out how to use SMB alerts.  This will require a new
+ * interface into the I2C driver, I believe.
+ */
+
+#include <linux/version.h>
+#if defined(MODVERSIONS)
+#include <linux/modversions.h>
+#endif
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/i2c.h>
+#include <linux/ipmi_smi.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/kthread.h>
+#include <linux/acpi.h>
+
+#define PFX "ipmi_ssif: "
+#define DEVICE_NAME "ipmi_ssif"
+
+#define IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD	0x57
+
+#define	SSIF_IPMI_REQUEST			2
+#define	SSIF_IPMI_MULTI_PART_REQUEST_START	6
+#define	SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE	7
+#define	SSIF_IPMI_RESPONSE			3
+#define	SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE	9
+
+/* ssif_debug is a bit-field
+ *	SSIF_DEBUG_MSG -	commands and their responses
+ *	SSIF_DEBUG_STATES -	message states
+ *	SSIF_DEBUG_TIMING -	 Measure times between events in the driver
+ */
+#define SSIF_DEBUG_TIMING	4
+#define SSIF_DEBUG_STATE	2
+#define SSIF_DEBUG_MSG		1
+#define SSIF_NODEBUG		0
+#define SSIF_DEFAULT_DEBUG	(SSIF_NODEBUG)
+
+/*
+ * Timer values
+ */
+#define SSIF_MSG_USEC		20000	/* 20ms between message tries. */
+#define SSIF_MSG_PART_USEC	5000	/* 5ms for a message part */
+
+/* How many times to we retry sending/receiving the message. */
+#define	SSIF_SEND_RETRIES	5
+#define	SSIF_RECV_RETRIES	250
+
+#define SSIF_MSG_MSEC		(SSIF_MSG_USEC / 1000)
+#define SSIF_MSG_JIFFIES	((SSIF_MSG_USEC * 1000) / TICK_NSEC)
+#define SSIF_MSG_PART_JIFFIES	((SSIF_MSG_PART_USEC * 1000) / TICK_NSEC)
+
+enum ssif_intf_state {
+	SSIF_NORMAL,
+	SSIF_GETTING_FLAGS,
+	SSIF_GETTING_EVENTS,
+	SSIF_CLEARING_FLAGS,
+	SSIF_GETTING_MESSAGES,
+	/* FIXME - add watchdog stuff. */
+};
+
+#define SSIF_IDLE(ssif)	 ((ssif)->ssif_state == SSIF_NORMAL \
+			  && (ssif)->curr_msg == NULL)
+
+/*
+ * Indexes into stats[] in ssif_info below.
+ */
+enum ssif_stat_indexes {
+	/* Number of total messages sent. */
+	SSIF_STAT_sent_messages = 0,
+
+	/*
+	 * Number of message parts sent.  Messages may be broken into
+	 * parts if they are long.
+	 */
+	SSIF_STAT_sent_messages_parts,
+
+	/*
+	 * Number of time a message was retried.
+	 */
+	SSIF_STAT_send_retries,
+
+	/*
+	 * Number of times the send of a message failed.
+	 */
+	SSIF_STAT_send_errors,
+
+	/*
+	 * Number of message responses received.
+	 */
+	SSIF_STAT_received_messages,
+
+	/*
+	 * Number of message fragments received.
+	 */
+	SSIF_STAT_received_message_parts,
+
+	/*
+	 * Number of times the receive of a message was retried.
+	 */
+	SSIF_STAT_receive_retries,
+
+	/*
+	 * Number of errors receiving messages.
+	 */
+	SSIF_STAT_receive_errors,
+
+	/*
+	 * Number of times a flag fetch was requested.
+	 */
+	SSIF_STAT_flag_fetches,
+
+	/*
+	 * Number of times the hardware didn't follow the state machine.
+	 */
+	SSIF_STAT_hosed,
+
+	/*
+	 * Number of received events.
+	 */
+	SSIF_STAT_events,
+
+	/* Number of asyncronous messages received. */
+	SSIF_STAT_incoming_messages,
+
+	/* Number of watchdog pretimeouts. */
+	SSIF_STAT_watchdog_pretimeouts,
+
+	/* Always add statistics before this value, it must be last. */
+	SSIF_NUM_STATS
+};
+
+struct ssif_addr_info {
+	unsigned short addr;
+	struct i2c_board_info binfo;
+	char *adapter_name;
+	int debug;
+	int slave_addr;
+	enum ipmi_addr_src addr_src;
+	union ipmi_smi_info_union addr_info;
+
+	struct mutex clients_mutex;
+	struct list_head clients;
+
+	struct list_head link;
+};
+
+struct ssif_info;
+
+typedef void (*ssif_i2c_done)(struct ssif_info *ssif_info, int result,
+			     unsigned char *data, unsigned int len);
+
+struct ssif_info {
+	ipmi_smi_t          intf;
+	int                 intf_num;
+	spinlock_t	    lock;
+	struct ipmi_smi_msg *waiting_msg;
+	struct ipmi_smi_msg *curr_msg;
+	enum ssif_intf_state ssif_state;
+	unsigned long       ssif_debug;
+
+	struct ipmi_smi_handlers handlers;
+
+	enum ipmi_addr_src addr_source; /* ACPI, PCI, SMBIOS, hardcode, etc. */
+	union ipmi_smi_info_union addr_info;
+
+	/*
+	 * Flags from the last GET_MSG_FLAGS command, used when an ATTN
+	 * is set to hold the flags until we are done handling everything
+	 * from the flags.
+	 */
+#define RECEIVE_MSG_AVAIL	0x01
+#define EVENT_MSG_BUFFER_FULL	0x02
+#define WDT_PRE_TIMEOUT_INT	0x08
+	unsigned char       msg_flags;
+
+	bool		    has_event_buffer;
+
+	/*
+	 * If set to true, this will request events the next time the
+	 * state machine is idle.
+	 */
+	bool                req_events;
+
+	/*
+	 * If set to true, this will request flags the next time the
+	 * state machine is idle.
+	 */
+	bool                req_flags;
+
+	/*
+	 * Used to perform timer operations when run-to-completion
+	 * mode is on.  This is a countdown timer.
+	 */
+	int                 rtc_us_timer;
+
+	/* Used for sending/receiving data.  +1 for the length. */
+	unsigned char data[IPMI_MAX_MSG_LENGTH + 1];
+	unsigned int  data_len;
+
+	/* Temp receive buffer, gets copied into data. */
+	unsigned char recv[I2C_SMBUS_BLOCK_MAX];
+
+	struct i2c_client *client;
+	ssif_i2c_done done_handler;
+
+	/* Thread interface handling */
+	struct task_struct *thread;
+	struct completion wake_thread;
+	bool stopping;
+	int i2c_read_write;
+	int i2c_command;
+	unsigned char *i2c_data;
+	unsigned int i2c_size;
+
+	/* From the device id response. */
+	struct ipmi_device_id device_id;
+
+	struct timer_list retry_timer;
+	int retries_left;
+
+	/* Info from SSIF cmd */
+	unsigned char max_xmit_msg_size;
+	unsigned char max_recv_msg_size;
+	unsigned int  multi_support;
+	int           supports_pec;
+
+#define SSIF_NO_MULTI		0
+#define SSIF_MULTI_2_PART	1
+#define SSIF_MULTI_n_PART	2
+	unsigned char *multi_data;
+	unsigned int  multi_len;
+	unsigned int  multi_pos;
+
+	atomic_t stats[SSIF_NUM_STATS];
+};
+
+#define ssif_inc_stat(ssif, stat) \
+	atomic_inc(&(ssif)->stats[SSIF_STAT_ ## stat])
+#define ssif_get_stat(ssif, stat) \
+	((unsigned int) atomic_read(&(ssif)->stats[SSIF_STAT_ ## stat]))
+
+static bool initialized;
+
+static atomic_t next_intf = ATOMIC_INIT(0);
+
+static void return_hosed_msg(struct ssif_info *ssif_info,
+			     struct ipmi_smi_msg *msg);
+static void start_next_msg(struct ssif_info *ssif_info, unsigned long *flags);
+static int start_send(struct ssif_info *ssif_info,
+		      unsigned char   *data,
+		      unsigned int    len);
+
+static unsigned long *ipmi_ssif_lock_cond(struct ssif_info *ssif_info,
+					  unsigned long *flags)
+{
+	spin_lock_irqsave(&ssif_info->lock, *flags);
+	return flags;
+}
+
+static void ipmi_ssif_unlock_cond(struct ssif_info *ssif_info,
+				  unsigned long *flags)
+{
+	spin_unlock_irqrestore(&ssif_info->lock, *flags);
+}
+
+static void deliver_recv_msg(struct ssif_info *ssif_info,
+			     struct ipmi_smi_msg *msg)
+{
+	ipmi_smi_t    intf = ssif_info->intf;
+
+	if (!intf) {
+		ipmi_free_smi_msg(msg);
+	} else if (msg->rsp_size < 0) {
+		return_hosed_msg(ssif_info, msg);
+		pr_err(PFX
+		       "Malformed message in deliver_recv_msg: rsp_size = %d\n",
+		       msg->rsp_size);
+	} else {
+		ipmi_smi_msg_received(intf, msg);
+	}
+}
+
+static void return_hosed_msg(struct ssif_info *ssif_info,
+			     struct ipmi_smi_msg *msg)
+{
+	ssif_inc_stat(ssif_info, hosed);
+
+	/* Make it a response */
+	msg->rsp[0] = msg->data[0] | 4;
+	msg->rsp[1] = msg->data[1];
+	msg->rsp[2] = 0xFF; /* Unknown error. */
+	msg->rsp_size = 3;
+
+	deliver_recv_msg(ssif_info, msg);
+}
+
+/*
+ * Must be called with the message lock held.  This will release the
+ * message lock.  Note that the caller will check SSIF_IDLE and start a
+ * new operation, so there is no need to check for new messages to
+ * start in here.
+ */
+static void start_clear_flags(struct ssif_info *ssif_info, unsigned long *flags)
+{
+	unsigned char msg[3];
+
+	ssif_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
+	ssif_info->ssif_state = SSIF_CLEARING_FLAGS;
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	/* Make sure the watchdog pre-timeout flag is not set at startup. */
+	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
+	msg[2] = WDT_PRE_TIMEOUT_INT;
+
+	if (start_send(ssif_info, msg, 3) != 0) {
+		/* Error, just go to normal state. */
+		ssif_info->ssif_state = SSIF_NORMAL;
+	}
+}
+
+static void start_flag_fetch(struct ssif_info *ssif_info, unsigned long *flags)
+{
+	unsigned char mb[2];
+
+	ssif_info->req_flags = false;
+	ssif_info->ssif_state = SSIF_GETTING_FLAGS;
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	mb[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	mb[1] = IPMI_GET_MSG_FLAGS_CMD;
+	if (start_send(ssif_info, mb, 2) != 0)
+		ssif_info->ssif_state = SSIF_NORMAL;
+}
+
+static void check_start_send(struct ssif_info *ssif_info, unsigned long *flags,
+			     struct ipmi_smi_msg *msg)
+{
+	if (start_send(ssif_info, msg->data, msg->data_size) != 0) {
+		unsigned long oflags;
+
+		flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+		ssif_info->curr_msg = NULL;
+		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+		ipmi_free_smi_msg(msg);
+	}
+}
+
+static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags)
+{
+	struct ipmi_smi_msg *msg;
+
+	ssif_info->req_events = false;
+
+	msg = ipmi_alloc_smi_msg();
+	if (!msg) {
+		ssif_info->ssif_state = SSIF_NORMAL;
+		return;
+	}
+
+	ssif_info->curr_msg = msg;
+	ssif_info->ssif_state = SSIF_GETTING_EVENTS;
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
+	msg->data_size = 2;
+
+	check_start_send(ssif_info, flags, msg);
+}
+
+static void start_recv_msg_fetch(struct ssif_info *ssif_info,
+				 unsigned long *flags)
+{
+	struct ipmi_smi_msg *msg;
+
+	msg = ipmi_alloc_smi_msg();
+	if (!msg) {
+		ssif_info->ssif_state = SSIF_NORMAL;
+		return;
+	}
+
+	ssif_info->curr_msg = msg;
+	ssif_info->ssif_state = SSIF_GETTING_MESSAGES;
+	ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2);
+	msg->data[1] = IPMI_GET_MSG_CMD;
+	msg->data_size = 2;
+
+	check_start_send(ssif_info, flags, msg);
+}
+
+/*
+ * Must be called with the message lock held.  This will release the
+ * message lock.  Note that the caller will check SSIF_IDLE and start a
+ * new operation, so there is no need to check for new messages to
+ * start in here.
+ */
+static void handle_flags(struct ssif_info *ssif_info, unsigned long *flags)
+{
+	if (ssif_info->msg_flags & WDT_PRE_TIMEOUT_INT) {
+		ipmi_smi_t intf = ssif_info->intf;
+		/* Watchdog pre-timeout */
+		ssif_inc_stat(ssif_info, watchdog_pretimeouts);
+		start_clear_flags(ssif_info, flags);
+		if (intf)
+			ipmi_smi_watchdog_pretimeout(intf);
+	} else if (ssif_info->msg_flags & RECEIVE_MSG_AVAIL)
+		/* Messages available. */
+		start_recv_msg_fetch(ssif_info, flags);
+	else if (ssif_info->msg_flags & EVENT_MSG_BUFFER_FULL)
+		/* Events available. */
+		start_event_fetch(ssif_info, flags);
+	else {
+		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+	}
+}
+
+static int ipmi_ssif_thread(void *data)
+{
+	struct ssif_info *ssif_info = data;
+
+	while (!kthread_should_stop()) {
+		int result;
+
+		/* Wait for something to do */
+		wait_for_completion(&ssif_info->wake_thread);
+		init_completion(&ssif_info->wake_thread);
+
+		if (ssif_info->stopping)
+			break;
+
+		if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) {
+			result = i2c_smbus_write_block_data(
+				ssif_info->client, SSIF_IPMI_REQUEST,
+				ssif_info->i2c_data[0],
+				ssif_info->i2c_data + 1);
+			ssif_info->done_handler(ssif_info, result, NULL, 0);
+		} else {
+			result = i2c_smbus_read_block_data(
+				ssif_info->client, SSIF_IPMI_RESPONSE,
+				ssif_info->i2c_data);
+			if (result < 0)
+				ssif_info->done_handler(ssif_info, result,
+							NULL, 0);
+			else
+				ssif_info->done_handler(ssif_info, 0,
+							ssif_info->i2c_data,
+							result);
+		}
+	}
+
+	return 0;
+}
+
+static int ssif_i2c_send(struct ssif_info *ssif_info,
+			ssif_i2c_done handler,
+			int read_write, int command,
+			unsigned char *data, unsigned int size)
+{
+	ssif_info->done_handler = handler;
+
+	ssif_info->i2c_read_write = read_write;
+	ssif_info->i2c_command = command;
+	ssif_info->i2c_data = data;
+	ssif_info->i2c_size = size;
+	complete(&ssif_info->wake_thread);
+	return 0;
+}
+
+
+static void msg_done_handler(struct ssif_info *ssif_info, int result,
+			     unsigned char *data, unsigned int len);
+
+static void retry_timeout(unsigned long data)
+{
+	struct ssif_info *ssif_info = (void *) data;
+	int rv;
+
+	if (ssif_info->stopping)
+		return;
+
+	ssif_info->rtc_us_timer = 0;
+
+	rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ,
+			  SSIF_IPMI_RESPONSE,
+			  ssif_info->recv, I2C_SMBUS_BLOCK_DATA);
+	if (rv < 0) {
+		/* request failed, just return the error. */
+		if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+			pr_info("Error from i2c_non_blocking_op(5)\n");
+
+		msg_done_handler(ssif_info, -EIO, NULL, 0);
+	}
+}
+
+static int start_resend(struct ssif_info *ssif_info);
+
+static void msg_done_handler(struct ssif_info *ssif_info, int result,
+			     unsigned char *data, unsigned int len)
+{
+	struct ipmi_smi_msg *msg;
+	unsigned long oflags, *flags;
+	int rv;
+
+	/*
+	 * We are single-threaded here, so no need for a lock until we
+	 * start messing with driver states or the queues.
+	 */
+
+	if (result < 0) {
+		ssif_info->retries_left--;
+		if (ssif_info->retries_left > 0) {
+			ssif_inc_stat(ssif_info, receive_retries);
+
+			mod_timer(&ssif_info->retry_timer,
+				  jiffies + SSIF_MSG_JIFFIES);
+			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
+			return;
+		}
+
+		ssif_inc_stat(ssif_info, receive_errors);
+
+		if  (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+			pr_info("Error in msg_done_handler: %d\n", result);
+		len = 0;
+		goto continue_op;
+	}
+
+	if ((len > 1) && (ssif_info->multi_pos == 0)
+				&& (data[0] == 0x00) && (data[1] == 0x01)) {
+		/* Start of multi-part read.  Start the next transaction. */
+		int i;
+
+		ssif_inc_stat(ssif_info, received_message_parts);
+
+		/* Remove the multi-part read marker. */
+		for (i = 0; i < (len-2); i++)
+			ssif_info->data[i] = data[i+2];
+		len -= 2;
+		ssif_info->multi_len = len;
+		ssif_info->multi_pos = 1;
+
+		rv = ssif_i2c_send(ssif_info, msg_done_handler, I2C_SMBUS_READ,
+				  SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE,
+				  ssif_info->recv, I2C_SMBUS_BLOCK_DATA);
+		if (rv < 0) {
+			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+				pr_info("Error from i2c_non_blocking_op(1)\n");
+
+			result = -EIO;
+		} else
+			return;
+	} else if (ssif_info->multi_pos) {
+		/* Middle of multi-part read.  Start the next transaction. */
+		int i;
+		unsigned char blocknum;
+
+		if (len == 0) {
+			result = -EIO;
+			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+				pr_info(PFX "Middle message with no data\n");
+
+			goto continue_op;
+		}
+
+		blocknum = data[ssif_info->multi_len];
+
+		if (ssif_info->multi_len+len-1 > IPMI_MAX_MSG_LENGTH) {
+			/* Received message too big, abort the operation. */
+			result = -E2BIG;
+			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+				pr_info("Received message too big\n");
+
+			goto continue_op;
+		}
+
+		/* Remove the blocknum from the data. */
+		for (i = 0; i < (len-1); i++)
+			ssif_info->data[i+ssif_info->multi_len] = data[i+1];
+		len--;
+		ssif_info->multi_len += len;
+		if (blocknum == 0xff) {
+			/* End of read */
+			len = ssif_info->multi_len;
+			data = ssif_info->data;
+		} else if ((blocknum+1) != ssif_info->multi_pos) {
+			/*
+			 * Out of sequence block, just abort.  Block
+			 * numbers start at zero for the second block,
+			 * but multi_pos starts at one, so the +1.
+			 */
+			result = -EIO;
+		} else {
+			ssif_inc_stat(ssif_info, received_message_parts);
+
+			ssif_info->multi_pos++;
+
+			rv = ssif_i2c_send(ssif_info, msg_done_handler,
+					   I2C_SMBUS_READ,
+					   SSIF_IPMI_MULTI_PART_RESPONSE_MIDDLE,
+					   ssif_info->recv,
+					   I2C_SMBUS_BLOCK_DATA);
+			if (rv < 0) {
+				if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+					pr_info(PFX
+						"Error from i2c_non_blocking_op(2)\n");
+
+				result = -EIO;
+			} else
+				return;
+		}
+	}
+
+	if (result < 0) {
+		ssif_inc_stat(ssif_info, receive_errors);
+	} else {
+		ssif_inc_stat(ssif_info, received_messages);
+		ssif_inc_stat(ssif_info, received_message_parts);
+	}
+
+
+ continue_op:
+	if (ssif_info->ssif_debug & SSIF_DEBUG_STATE)
+		pr_info(PFX "DONE 1: state = %d, result=%d.\n",
+			ssif_info->ssif_state, result);
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	msg = ssif_info->curr_msg;
+	if (msg) {
+		msg->rsp_size = len;
+		if (msg->rsp_size > IPMI_MAX_MSG_LENGTH)
+			msg->rsp_size = IPMI_MAX_MSG_LENGTH;
+		memcpy(msg->rsp, data, msg->rsp_size);
+		ssif_info->curr_msg = NULL;
+	}
+
+	switch (ssif_info->ssif_state) {
+	case SSIF_NORMAL:
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+		if (!msg)
+			break;
+
+		if (result < 0)
+			return_hosed_msg(ssif_info, msg);
+		else
+			deliver_recv_msg(ssif_info, msg);
+		break;
+
+	case SSIF_GETTING_FLAGS:
+		/* We got the flags from the SSIF, now handle them. */
+		if ((result < 0) || (len < 4) || (data[2] != 0)) {
+			/*
+			 * Error fetching flags, or invalid length,
+			 * just give up for now.
+			 */
+			ssif_info->ssif_state = SSIF_NORMAL;
+			ipmi_ssif_unlock_cond(ssif_info, flags);
+			pr_warn(PFX "Error getting flags: %d %d, %x\n",
+			       result, len, data[2]);
+		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
+			   || data[1] != IPMI_GET_MSG_FLAGS_CMD) {
+			pr_warn(PFX "Invalid response getting flags: %x %x\n",
+				data[0], data[1]);
+		} else {
+			ssif_inc_stat(ssif_info, flag_fetches);
+			ssif_info->msg_flags = data[3];
+			handle_flags(ssif_info, flags);
+		}
+		break;
+
+	case SSIF_CLEARING_FLAGS:
+		/* We cleared the flags. */
+		if ((result < 0) || (len < 3) || (data[2] != 0)) {
+			/* Error clearing flags */
+			pr_warn(PFX "Error clearing flags: %d %d, %x\n",
+			       result, len, data[2]);
+		} else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
+			   || data[1] != IPMI_CLEAR_MSG_FLAGS_CMD) {
+			pr_warn(PFX "Invalid response clearing flags: %x %x\n",
+				data[0], data[1]);
+		}
+		ssif_info->ssif_state = SSIF_NORMAL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+		break;
+
+	case SSIF_GETTING_EVENTS:
+		if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
+			/* Error getting event, probably done. */
+			msg->done(msg);
+
+			/* Take off the event flag. */
+			ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
+			handle_flags(ssif_info, flags);
+		} else if (msg->rsp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
+			   || msg->rsp[1] != IPMI_READ_EVENT_MSG_BUFFER_CMD) {
+			pr_warn(PFX "Invalid response getting events: %x %x\n",
+				msg->rsp[0], msg->rsp[1]);
+			msg->done(msg);
+			/* Take off the event flag. */
+			ssif_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL;
+			handle_flags(ssif_info, flags);
+		} else {
+			handle_flags(ssif_info, flags);
+			ssif_inc_stat(ssif_info, events);
+			deliver_recv_msg(ssif_info, msg);
+		}
+		break;
+
+	case SSIF_GETTING_MESSAGES:
+		if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) {
+			/* Error getting event, probably done. */
+			msg->done(msg);
+
+			/* Take off the msg flag. */
+			ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
+			handle_flags(ssif_info, flags);
+		} else if (msg->rsp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2
+			   || msg->rsp[1] != IPMI_GET_MSG_CMD) {
+			pr_warn(PFX "Invalid response clearing flags: %x %x\n",
+				msg->rsp[0], msg->rsp[1]);
+			msg->done(msg);
+
+			/* Take off the msg flag. */
+			ssif_info->msg_flags &= ~RECEIVE_MSG_AVAIL;
+			handle_flags(ssif_info, flags);
+		} else {
+			ssif_inc_stat(ssif_info, incoming_messages);
+			handle_flags(ssif_info, flags);
+			deliver_recv_msg(ssif_info, msg);
+		}
+		break;
+	}
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	if (SSIF_IDLE(ssif_info) && !ssif_info->stopping) {
+		if (ssif_info->req_events)
+			start_event_fetch(ssif_info, flags);
+		else if (ssif_info->req_flags)
+			start_flag_fetch(ssif_info, flags);
+		else
+			start_next_msg(ssif_info, flags);
+	} else
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+
+	if (ssif_info->ssif_debug & SSIF_DEBUG_STATE)
+		pr_info(PFX "DONE 2: state = %d.\n", ssif_info->ssif_state);
+}
+
+static void msg_written_handler(struct ssif_info *ssif_info, int result,
+				unsigned char *data, unsigned int len)
+{
+	int rv;
+
+	/* We are single-threaded here, so no need for a lock. */
+	if (result < 0) {
+		ssif_info->retries_left--;
+		if (ssif_info->retries_left > 0) {
+			if (!start_resend(ssif_info)) {
+				ssif_inc_stat(ssif_info, send_retries);
+				return;
+			}
+			/* request failed, just return the error. */
+			ssif_inc_stat(ssif_info, send_errors);
+
+			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+				pr_info(PFX
+					"Out of retries in msg_written_handler\n");
+			msg_done_handler(ssif_info, -EIO, NULL, 0);
+			return;
+		}
+
+		ssif_inc_stat(ssif_info, send_errors);
+
+		/*
+		 * Got an error on transmit, let the done routine
+		 * handle it.
+		 */
+		if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+			pr_info("Error in msg_written_handler: %d\n", result);
+
+		msg_done_handler(ssif_info, result, NULL, 0);
+		return;
+	}
+
+	if (ssif_info->multi_data) {
+		/* In the middle of a multi-data write. */
+		int left;
+
+		ssif_inc_stat(ssif_info, sent_messages_parts);
+
+		left = ssif_info->multi_len - ssif_info->multi_pos;
+		if (left > 32)
+			left = 32;
+		/* Length byte. */
+		ssif_info->multi_data[ssif_info->multi_pos] = left;
+		ssif_info->multi_pos += left;
+		if (left < 32)
+			/*
+			 * Write is finished.  Note that we must end
+			 * with a write of less than 32 bytes to
+			 * complete the transaction, even if it is
+			 * zero bytes.
+			 */
+			ssif_info->multi_data = NULL;
+
+		rv = ssif_i2c_send(ssif_info, msg_written_handler,
+				  I2C_SMBUS_WRITE,
+				  SSIF_IPMI_MULTI_PART_REQUEST_MIDDLE,
+				  ssif_info->multi_data + ssif_info->multi_pos,
+				  I2C_SMBUS_BLOCK_DATA);
+		if (rv < 0) {
+			/* request failed, just return the error. */
+			ssif_inc_stat(ssif_info, send_errors);
+
+			if (ssif_info->ssif_debug & SSIF_DEBUG_MSG)
+				pr_info("Error from i2c_non_blocking_op(3)\n");
+			msg_done_handler(ssif_info, -EIO, NULL, 0);
+		}
+	} else {
+		ssif_inc_stat(ssif_info, sent_messages);
+		ssif_inc_stat(ssif_info, sent_messages_parts);
+
+		/* Wait a jiffie then request the next message */
+		ssif_info->retries_left = SSIF_RECV_RETRIES;
+		ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
+		mod_timer(&ssif_info->retry_timer,
+			  jiffies + SSIF_MSG_PART_JIFFIES);
+		return;
+	}
+}
+
+static int start_resend(struct ssif_info *ssif_info)
+{
+	int rv;
+	int command;
+
+	if (ssif_info->data_len > 32) {
+		command = SSIF_IPMI_MULTI_PART_REQUEST_START;
+		ssif_info->multi_data = ssif_info->data;
+		ssif_info->multi_len = ssif_info->data_len;
+		/*
+		 * Subtle thing, this is 32, not 33, because we will
+		 * overwrite the thing at position 32 (which was just
+		 * transmitted) with the new length.
+		 */
+		ssif_info->multi_pos = 32;
+		ssif_info->data[0] = 32;
+	} else {
+		ssif_info->multi_data = NULL;
+		command = SSIF_IPMI_REQUEST;
+		ssif_info->data[0] = ssif_info->data_len;
+	}
+
+	rv = ssif_i2c_send(ssif_info, msg_written_handler, I2C_SMBUS_WRITE,
+			  command, ssif_info->data, I2C_SMBUS_BLOCK_DATA);
+	if (rv && (ssif_info->ssif_debug & SSIF_DEBUG_MSG))
+		pr_info("Error from i2c_non_blocking_op(4)\n");
+	return rv;
+}
+
+static int start_send(struct ssif_info *ssif_info,
+		      unsigned char   *data,
+		      unsigned int    len)
+{
+	if (len > IPMI_MAX_MSG_LENGTH)
+		return -E2BIG;
+	if (len > ssif_info->max_xmit_msg_size)
+		return -E2BIG;
+
+	ssif_info->retries_left = SSIF_SEND_RETRIES;
+	memcpy(ssif_info->data+1, data, len);
+	ssif_info->data_len = len;
+	return start_resend(ssif_info);
+}
+
+/* Must be called with the message lock held. */
+static void start_next_msg(struct ssif_info *ssif_info, unsigned long *flags)
+{
+	struct ipmi_smi_msg *msg;
+	unsigned long oflags;
+
+ restart:
+	if (!SSIF_IDLE(ssif_info)) {
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+		return;
+	}
+
+	if (!ssif_info->waiting_msg) {
+		ssif_info->curr_msg = NULL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+	} else {
+		int rv;
+
+		ssif_info->curr_msg = ssif_info->waiting_msg;
+		ssif_info->waiting_msg = NULL;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+		rv = start_send(ssif_info,
+				ssif_info->curr_msg->data,
+				ssif_info->curr_msg->data_size);
+		if (rv) {
+			msg = ssif_info->curr_msg;
+			ssif_info->curr_msg = NULL;
+			return_hosed_msg(ssif_info, msg);
+			flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+			goto restart;
+		}
+	}
+}
+
+static void sender(void                *send_info,
+		   struct ipmi_smi_msg *msg)
+{
+	struct ssif_info *ssif_info = (struct ssif_info *) send_info;
+	unsigned long oflags, *flags;
+
+	BUG_ON(ssif_info->waiting_msg);
+	ssif_info->waiting_msg = msg;
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	start_next_msg(ssif_info, flags);
+
+	if (ssif_info->ssif_debug & SSIF_DEBUG_TIMING) {
+		struct timeval t;
+
+		do_gettimeofday(&t);
+		pr_info("**Enqueue %02x %02x: %ld.%6.6ld\n",
+		       msg->data[0], msg->data[1], t.tv_sec, t.tv_usec);
+	}
+}
+
+static int get_smi_info(void *send_info, struct ipmi_smi_info *data)
+{
+	struct ssif_info *ssif_info = send_info;
+
+	data->addr_src = ssif_info->addr_source;
+	data->dev = &ssif_info->client->dev;
+	data->addr_info = ssif_info->addr_info;
+	get_device(data->dev);
+
+	return 0;
+}
+
+/*
+ * Instead of having our own timer to periodically check the message
+ * flags, we let the message handler drive us.
+ */
+static void request_events(void *send_info)
+{
+	struct ssif_info *ssif_info = (struct ssif_info *) send_info;
+	unsigned long oflags, *flags;
+
+	if (!ssif_info->has_event_buffer)
+		return;
+
+	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
+	/*
+	 * Request flags first, not events, because the lower layer
+	 * doesn't have a way to send an attention.  But make sure
+	 * event checking still happens.
+	 */
+	ssif_info->req_events = true;
+	if (SSIF_IDLE(ssif_info))
+		start_flag_fetch(ssif_info, flags);
+	else {
+		ssif_info->req_flags = true;
+		ipmi_ssif_unlock_cond(ssif_info, flags);
+	}
+}
+
+static int inc_usecount(void *send_info)
+{
+	struct ssif_info *ssif_info = send_info;
+
+	if (!i2c_get_adapter(ssif_info->client->adapter->nr))
+		return -ENODEV;
+
+	i2c_use_client(ssif_info->client);
+	return 0;
+}
+
+static void dec_usecount(void *send_info)
+{
+	struct ssif_info *ssif_info = send_info;
+
+	i2c_release_client(ssif_info->client);
+	i2c_put_adapter(ssif_info->client->adapter);
+}
+
+static int ssif_start_processing(void *send_info,
+				 ipmi_smi_t intf)
+{
+	struct ssif_info *ssif_info = send_info;
+
+	ssif_info->intf = intf;
+
+	return 0;
+}
+
+#define MAX_SSIF_BMCS 4
+
+static unsigned short addr[MAX_SSIF_BMCS];
+static int num_addrs;
+module_param_array(addr, ushort, &num_addrs, 0);
+MODULE_PARM_DESC(addr, "The addresses to scan for IPMI BMCs on the SSIFs.");
+
+static char *adapter_name[MAX_SSIF_BMCS];
+static int num_adapter_names;
+module_param_array(adapter_name, charp, &num_adapter_names, 0);
+MODULE_PARM_DESC(adapter_name, "The string name of the I2C device that has the BMC.  By default all devices are scanned.");
+
+static int slave_addrs[MAX_SSIF_BMCS];
+static int num_slave_addrs;
+module_param_array(slave_addrs, int, &num_slave_addrs, 0);
+MODULE_PARM_DESC(slave_addrs,
+		 "The default IPMB slave address for the controller.");
+
+/*
+ * Bit 0 enables message debugging, bit 1 enables state debugging, and
+ * bit 2 enables timing debugging.  This is an array indexed by
+ * interface number"
+ */
+static int dbg[MAX_SSIF_BMCS];
+static int num_dbg;
+module_param_array(dbg, int, &num_dbg, 0);
+MODULE_PARM_DESC(dbg, "Turn on debugging.");
+
+static bool ssif_dbg_probe;
+module_param_named(dbg_probe, ssif_dbg_probe, bool, 0);
+MODULE_PARM_DESC(dbg_probe, "Enable debugging of probing of adapters.");
+
+static int use_thread;
+module_param(use_thread, int, 0);
+MODULE_PARM_DESC(use_thread, "Use the thread interface.");
+
+static bool ssif_tryacpi = 1;
+module_param_named(tryacpi, ssif_tryacpi, bool, 0);
+MODULE_PARM_DESC(tryacpi, "Setting this to zero will disable the default scan of the interfaces identified via ACPI");
+
+static bool ssif_trydmi = 1;
+module_param_named(trydmi, ssif_trydmi, bool, 0);
+MODULE_PARM_DESC(trydmi, "Setting this to zero will disable the default scan of the interfaces identified via DMI (SMBIOS)");
+
+static DEFINE_MUTEX(ssif_infos_mutex);
+static LIST_HEAD(ssif_infos);
+
+static int ssif_remove(struct i2c_client *client)
+{
+	struct ssif_info *ssif_info = i2c_get_clientdata(client);
+	int rv;
+
+	if (!ssif_info)
+		return 0;
+
+	i2c_set_clientdata(client, NULL);
+
+	/*
+	 * After this point, we won't deliver anything asychronously
+	 * to the message handler.  We can unregister ourself.
+	 */
+	rv = ipmi_unregister_smi(ssif_info->intf);
+	if (rv) {
+		pr_err(PFX "Unable to unregister device: errno=%d\n", rv);
+		return rv;
+	}
+	ssif_info->intf = NULL;
+
+	/* make sure the driver is not looking for flags any more. */
+	while (ssif_info->ssif_state != SSIF_NORMAL)
+		schedule_timeout(1);
+
+	ssif_info->stopping = true;
+	del_timer_sync(&ssif_info->retry_timer);
+	if (ssif_info->thread) {
+		complete(&ssif_info->wake_thread);
+		kthread_stop(ssif_info->thread);
+	}
+
+	/*
+	 * No message can be outstanding now, we have removed the
+	 * upper layer and it permitted us to do so.
+	 */
+	kfree(ssif_info);
+	return 0;
+}
+
+static int do_cmd(struct i2c_client *client, int len, unsigned char *msg,
+		  int *resp_len, unsigned char *resp)
+{
+	int retry_cnt;
+	int ret;
+
+	retry_cnt = SSIF_SEND_RETRIES;
+ retry1:
+	ret = i2c_smbus_write_block_data(client, SSIF_IPMI_REQUEST, len, msg);
+	if (ret) {
+		retry_cnt--;
+		if (retry_cnt > 0)
+			goto retry1;
+		return -ENODEV;
+	}
+
+	ret = -ENODEV;
+	retry_cnt = SSIF_RECV_RETRIES;
+	while (retry_cnt > 0) {
+		ret = i2c_smbus_read_block_data(client, SSIF_IPMI_RESPONSE,
+						resp);
+		if (ret > 0)
+			break;
+		msleep(SSIF_MSG_MSEC);
+		retry_cnt--;
+		if (retry_cnt <= 0)
+			break;
+	}
+
+	if (ret > 0) {
+		/* Validate that the response is correct. */
+		if (ret < 3 ||
+		    (resp[0] != (msg[0] | (1 << 2))) ||
+		    (resp[1] != msg[1]))
+			ret = -EINVAL;
+		else {
+			*resp_len = ret;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+static int ssif_detect(struct i2c_client *client, struct i2c_board_info *info)
+{
+	unsigned char *resp;
+	unsigned char msg[3];
+	int           rv;
+	int           len;
+
+	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	/* Do a Get Device ID command, since it is required. */
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_GET_DEVICE_ID_CMD;
+	rv = do_cmd(client, 2, msg, &len, resp);
+	if (rv)
+		rv = -ENODEV;
+	else
+		strlcpy(info->type, DEVICE_NAME, I2C_NAME_SIZE);
+	kfree(resp);
+	return rv;
+}
+
+static int smi_type_proc_show(struct seq_file *m, void *v)
+{
+	return seq_puts(m, "ssif\n");
+}
+
+static int smi_type_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smi_type_proc_show, inode->i_private);
+}
+
+static const struct file_operations smi_type_proc_ops = {
+	.open		= smi_type_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int smi_stats_proc_show(struct seq_file *m, void *v)
+{
+	struct ssif_info *ssif_info = m->private;
+
+	seq_printf(m, "sent_messages:          %u\n",
+		   ssif_get_stat(ssif_info, sent_messages));
+	seq_printf(m, "sent_messages_parts:    %u\n",
+		   ssif_get_stat(ssif_info, sent_messages_parts));
+	seq_printf(m, "send_retries:           %u\n",
+		   ssif_get_stat(ssif_info, send_retries));
+	seq_printf(m, "send_errors:            %u\n",
+		   ssif_get_stat(ssif_info, send_errors));
+	seq_printf(m, "received_messages:      %u\n",
+		   ssif_get_stat(ssif_info, received_messages));
+	seq_printf(m, "received_message_parts: %u\n",
+		   ssif_get_stat(ssif_info, received_message_parts));
+	seq_printf(m, "receive_retries:        %u\n",
+		   ssif_get_stat(ssif_info, receive_retries));
+	seq_printf(m, "receive_errors:         %u\n",
+		   ssif_get_stat(ssif_info, receive_errors));
+	seq_printf(m, "flag_fetches:           %u\n",
+		   ssif_get_stat(ssif_info, flag_fetches));
+	seq_printf(m, "hosed:                  %u\n",
+		   ssif_get_stat(ssif_info, hosed));
+	seq_printf(m, "events:                 %u\n",
+		   ssif_get_stat(ssif_info, events));
+	seq_printf(m, "watchdog_pretimeouts:   %u\n",
+		   ssif_get_stat(ssif_info, watchdog_pretimeouts));
+	return 0;
+}
+
+static int smi_stats_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smi_stats_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations smi_stats_proc_ops = {
+	.open		= smi_stats_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct ssif_addr_info *ssif_info_find(unsigned short addr,
+					     char *adapter_name,
+					     bool match_null_name)
+{
+	struct ssif_addr_info *info, *found = NULL;
+
+restart:
+	list_for_each_entry(info, &ssif_infos, link) {
+		if (info->binfo.addr == addr) {
+			if (info->adapter_name || adapter_name) {
+				if (!info->adapter_name != !adapter_name) {
+					/* One is NULL and one is not */
+					continue;
+				}
+				if (strcmp(info->adapter_name, adapter_name))
+					/* Names to not match */
+					continue;
+			}
+			found = info;
+			break;
+		}
+	}
+
+	if (!found && match_null_name) {
+		/* Try to get an exact match first, then try with a NULL name */
+		adapter_name = NULL;
+		match_null_name = false;
+		goto restart;
+	}
+
+	return found;
+}
+
+static bool check_acpi(struct ssif_info *ssif_info, struct device *dev)
+{
+#ifdef CONFIG_ACPI
+	acpi_handle acpi_handle;
+
+	acpi_handle = ACPI_HANDLE(dev);
+	if (acpi_handle) {
+		ssif_info->addr_source = SI_ACPI;
+		ssif_info->addr_info.acpi_info.acpi_handle = acpi_handle;
+		return true;
+	}
+#endif
+	return false;
+}
+
+static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	unsigned char     msg[3];
+	unsigned char     *resp;
+	struct ssif_info   *ssif_info;
+	int               rv = 0;
+	int               len;
+	int               i;
+	u8		  slave_addr = 0;
+	struct ssif_addr_info *addr_info = NULL;
+
+
+	resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	ssif_info = kzalloc(sizeof(*ssif_info), GFP_KERNEL);
+	if (!ssif_info) {
+		kfree(resp);
+		return -ENOMEM;
+	}
+
+	if (!check_acpi(ssif_info, &client->dev)) {
+		addr_info = ssif_info_find(client->addr, client->adapter->name,
+					   true);
+		if (!addr_info) {
+			/* Must have come in through sysfs. */
+			ssif_info->addr_source = SI_HOTMOD;
+		} else {
+			ssif_info->addr_source = addr_info->addr_src;
+			ssif_info->ssif_debug = addr_info->debug;
+			ssif_info->addr_info = addr_info->addr_info;
+			slave_addr = addr_info->slave_addr;
+		}
+	}
+
+	pr_info(PFX "Trying %s-specified SSIF interface at i2c address 0x%x, adapter %s, slave address 0x%x\n",
+	       ipmi_addr_src_to_str(ssif_info->addr_source),
+	       client->addr, client->adapter->name, slave_addr);
+
+	/*
+	 * Do a Get Device ID command, since it comes back with some
+	 * useful info.
+	 */
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_GET_DEVICE_ID_CMD;
+	rv = do_cmd(client, 2, msg, &len, resp);
+	if (rv)
+		goto out;
+
+	rv = ipmi_demangle_device_id(resp, len, &ssif_info->device_id);
+	if (rv)
+		goto out;
+
+	ssif_info->client = client;
+	i2c_set_clientdata(client, ssif_info);
+
+	/* Now check for system interface capabilities */
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD;
+	msg[2] = 0; /* SSIF */
+	rv = do_cmd(client, 3, msg, &len, resp);
+	if (!rv && (len >= 3) && (resp[2] == 0)) {
+		if (len < 7) {
+			if (ssif_dbg_probe)
+				pr_info(PFX "SSIF info too short: %d\n", len);
+			goto no_support;
+		}
+
+		/* Got a good SSIF response, handle it. */
+		ssif_info->max_xmit_msg_size = resp[5];
+		ssif_info->max_recv_msg_size = resp[6];
+		ssif_info->multi_support = (resp[4] >> 6) & 0x3;
+		ssif_info->supports_pec = (resp[4] >> 3) & 0x1;
+
+		/* Sanitize the data */
+		switch (ssif_info->multi_support) {
+		case SSIF_NO_MULTI:
+			if (ssif_info->max_xmit_msg_size > 32)
+				ssif_info->max_xmit_msg_size = 32;
+			if (ssif_info->max_recv_msg_size > 32)
+				ssif_info->max_recv_msg_size = 32;
+			break;
+
+		case SSIF_MULTI_2_PART:
+			if (ssif_info->max_xmit_msg_size > 64)
+				ssif_info->max_xmit_msg_size = 64;
+			if (ssif_info->max_recv_msg_size > 62)
+				ssif_info->max_recv_msg_size = 62;
+			break;
+
+		case SSIF_MULTI_n_PART:
+			break;
+
+		default:
+			/* Data is not sane, just give up. */
+			goto no_support;
+		}
+	} else {
+ no_support:
+		/* Assume no multi-part or PEC support */
+		pr_info(PFX "Error fetching SSIF: %d %d %2.2x, your system probably doesn't support this command so  using defaults\n",
+		       rv, len, resp[2]);
+
+		ssif_info->max_xmit_msg_size = 32;
+		ssif_info->max_recv_msg_size = 32;
+		ssif_info->multi_support = SSIF_NO_MULTI;
+		ssif_info->supports_pec = 0;
+	}
+
+	/* Make sure the NMI timeout is cleared. */
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
+	msg[2] = WDT_PRE_TIMEOUT_INT;
+	rv = do_cmd(client, 3, msg, &len, resp);
+	if (rv || (len < 3) || (resp[2] != 0))
+		pr_warn(PFX "Unable to clear message flags: %d %d %2.2x\n",
+			rv, len, resp[2]);
+
+	/* Attempt to enable the event buffer. */
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
+	rv = do_cmd(client, 2, msg, &len, resp);
+	if (rv || (len < 4) || (resp[2] != 0)) {
+		pr_warn(PFX "Error getting global enables: %d %d %2.2x\n",
+			rv, len, resp[2]);
+		rv = 0; /* Not fatal */
+		goto found;
+	}
+
+	if (resp[3] & IPMI_BMC_EVT_MSG_BUFF) {
+		ssif_info->has_event_buffer = true;
+		/* buffer is already enabled, nothing to do. */
+		goto found;
+	}
+
+	msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+	msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
+	msg[2] = resp[3] | IPMI_BMC_EVT_MSG_BUFF;
+	rv = do_cmd(client, 3, msg, &len, resp);
+	if (rv || (len < 2)) {
+		pr_warn(PFX "Error getting global enables: %d %d %2.2x\n",
+			rv, len, resp[2]);
+		rv = 0; /* Not fatal */
+		goto found;
+	}
+
+	if (resp[2] == 0)
+		/* A successful return means the event buffer is supported. */
+		ssif_info->has_event_buffer = true;
+
+ found:
+	ssif_info->intf_num = atomic_inc_return(&next_intf);
+
+	if (ssif_dbg_probe) {
+		pr_info("ssif_probe: i2c_probe found device at i2c address %x\n",
+			client->addr);
+	}
+
+	spin_lock_init(&ssif_info->lock);
+	ssif_info->ssif_state = SSIF_NORMAL;
+	init_timer(&ssif_info->retry_timer);
+	ssif_info->retry_timer.data = (unsigned long) ssif_info;
+	ssif_info->retry_timer.function = retry_timeout;
+
+	for (i = 0; i < SSIF_NUM_STATS; i++)
+		atomic_set(&ssif_info->stats[i], 0);
+
+	if (ssif_info->supports_pec)
+		ssif_info->client->flags |= I2C_CLIENT_PEC;
+
+	ssif_info->handlers.owner = THIS_MODULE;
+	ssif_info->handlers.start_processing = ssif_start_processing;
+	ssif_info->handlers.get_smi_info = get_smi_info;
+	ssif_info->handlers.sender = sender;
+	ssif_info->handlers.request_events = request_events;
+	ssif_info->handlers.inc_usecount = inc_usecount;
+	ssif_info->handlers.dec_usecount = dec_usecount;
+
+	{
+		unsigned int thread_num;
+
+		thread_num = ((ssif_info->client->adapter->nr << 8) |
+			      ssif_info->client->addr);
+		init_completion(&ssif_info->wake_thread);
+		ssif_info->thread = kthread_run(ipmi_ssif_thread, ssif_info,
+					       "kssif%4.4x", thread_num);
+		if (IS_ERR(ssif_info->thread)) {
+			rv = PTR_ERR(ssif_info->thread);
+			dev_notice(&ssif_info->client->dev,
+				   "Could not start kernel thread: error %d\n",
+				   rv);
+			goto out;
+		}
+	}
+
+	rv = ipmi_register_smi(&ssif_info->handlers,
+			       ssif_info,
+			       &ssif_info->device_id,
+			       &ssif_info->client->dev,
+			       slave_addr);
+	 if (rv) {
+		pr_err(PFX "Unable to register device: error %d\n", rv);
+		goto out;
+	}
+
+	rv = ipmi_smi_add_proc_entry(ssif_info->intf, "type",
+				     &smi_type_proc_ops,
+				     ssif_info);
+	if (rv) {
+		pr_err(PFX "Unable to create proc entry: %d\n", rv);
+		goto out_err_unreg;
+	}
+
+	rv = ipmi_smi_add_proc_entry(ssif_info->intf, "ssif_stats",
+				     &smi_stats_proc_ops,
+				     ssif_info);
+	if (rv) {
+		pr_err(PFX "Unable to create proc entry: %d\n", rv);
+		goto out_err_unreg;
+	}
+
+ out:
+	if (rv)
+		kfree(ssif_info);
+	kfree(resp);
+	return rv;
+
+ out_err_unreg:
+	ipmi_unregister_smi(ssif_info->intf);
+	goto out;
+}
+
+static int ssif_adapter_handler(struct device *adev, void *opaque)
+{
+	struct ssif_addr_info *addr_info = opaque;
+
+	if (adev->type != &i2c_adapter_type)
+		return 0;
+
+	i2c_new_device(to_i2c_adapter(adev), &addr_info->binfo);
+
+	if (!addr_info->adapter_name)
+		return 1; /* Only try the first I2C adapter by default. */
+	return 0;
+}
+
+static int new_ssif_client(int addr, char *adapter_name,
+			   int debug, int slave_addr,
+			   enum ipmi_addr_src addr_src)
+{
+	struct ssif_addr_info *addr_info;
+	int rv = 0;
+
+	mutex_lock(&ssif_infos_mutex);
+	if (ssif_info_find(addr, adapter_name, false)) {
+		rv = -EEXIST;
+		goto out_unlock;
+	}
+
+	addr_info = kzalloc(sizeof(*addr_info), GFP_KERNEL);
+	if (!addr_info) {
+		rv = -ENOMEM;
+		goto out_unlock;
+	}
+
+	if (adapter_name) {
+		addr_info->adapter_name = kstrdup(adapter_name, GFP_KERNEL);
+		if (!addr_info->adapter_name) {
+			kfree(addr_info);
+			rv = -ENOMEM;
+			goto out_unlock;
+		}
+	}
+
+	strncpy(addr_info->binfo.type, DEVICE_NAME,
+		sizeof(addr_info->binfo.type));
+	addr_info->binfo.addr = addr;
+	addr_info->binfo.platform_data = addr_info;
+	addr_info->debug = debug;
+	addr_info->slave_addr = slave_addr;
+	addr_info->addr_src = addr_src;
+
+	list_add_tail(&addr_info->link, &ssif_infos);
+
+	if (initialized)
+		i2c_for_each_dev(addr_info, ssif_adapter_handler);
+	/* Otherwise address list will get it */
+
+out_unlock:
+	mutex_unlock(&ssif_infos_mutex);
+	return rv;
+}
+
+static void free_ssif_clients(void)
+{
+	struct ssif_addr_info *info, *tmp;
+
+	mutex_lock(&ssif_infos_mutex);
+	list_for_each_entry_safe(info, tmp, &ssif_infos, link) {
+		list_del(&info->link);
+		kfree(info->adapter_name);
+		kfree(info);
+	}
+	mutex_unlock(&ssif_infos_mutex);
+}
+
+static unsigned short *ssif_address_list(void)
+{
+	struct ssif_addr_info *info;
+	unsigned int count = 0, i;
+	unsigned short *address_list;
+
+	list_for_each_entry(info, &ssif_infos, link)
+		count++;
+
+	address_list = kzalloc(sizeof(*address_list) * (count + 1), GFP_KERNEL);
+	if (!address_list)
+		return NULL;
+
+	i = 0;
+	list_for_each_entry(info, &ssif_infos, link) {
+		unsigned short addr = info->binfo.addr;
+		int j;
+
+		for (j = 0; j < i; j++) {
+			if (address_list[j] == addr)
+				goto skip_addr;
+		}
+		address_list[i] = addr;
+skip_addr:
+		i++;
+	}
+	address_list[i] = I2C_CLIENT_END;
+
+	return address_list;
+}
+
+#ifdef CONFIG_ACPI
+static struct acpi_device_id ssif_acpi_match[] = {
+	{ "IPI0001", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, ssif_acpi_match);
+
+/*
+ * Once we get an ACPI failure, we don't try any more, because we go
+ * through the tables sequentially.  Once we don't find a table, there
+ * are no more.
+ */
+static int acpi_failure;
+
+/*
+ * Defined in the IPMI 2.0 spec.
+ */
+struct SPMITable {
+	s8	Signature[4];
+	u32	Length;
+	u8	Revision;
+	u8	Checksum;
+	s8	OEMID[6];
+	s8	OEMTableID[8];
+	s8	OEMRevision[4];
+	s8	CreatorID[4];
+	s8	CreatorRevision[4];
+	u8	InterfaceType;
+	u8	IPMIlegacy;
+	s16	SpecificationRevision;
+
+	/*
+	 * Bit 0 - SCI interrupt supported
+	 * Bit 1 - I/O APIC/SAPIC
+	 */
+	u8	InterruptType;
+
+	/*
+	 * If bit 0 of InterruptType is set, then this is the SCI
+	 * interrupt in the GPEx_STS register.
+	 */
+	u8	GPE;
+
+	s16	Reserved;
+
+	/*
+	 * If bit 1 of InterruptType is set, then this is the I/O
+	 * APIC/SAPIC interrupt.
+	 */
+	u32	GlobalSystemInterrupt;
+
+	/* The actual register address. */
+	struct acpi_generic_address addr;
+
+	u8	UID[4];
+
+	s8      spmi_id[1]; /* A '\0' terminated array starts here. */
+};
+
+static int try_init_spmi(struct SPMITable *spmi)
+{
+	unsigned short myaddr;
+
+	if (num_addrs >= MAX_SSIF_BMCS)
+		return -1;
+
+	if (spmi->IPMIlegacy != 1) {
+		pr_warn("IPMI: Bad SPMI legacy: %d\n", spmi->IPMIlegacy);
+		return -ENODEV;
+	}
+
+	if (spmi->InterfaceType != 4)
+		return -ENODEV;
+
+	if (spmi->addr.space_id != ACPI_ADR_SPACE_SMBUS) {
+		pr_warn(PFX "Invalid ACPI SSIF I/O Address type: %d\n",
+			spmi->addr.space_id);
+		return -EIO;
+	}
+
+	myaddr = spmi->addr.address >> 1;
+
+	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI);
+}
+
+static void spmi_find_bmc(void)
+{
+	acpi_status      status;
+	struct SPMITable *spmi;
+	int              i;
+
+	if (acpi_disabled)
+		return;
+
+	if (acpi_failure)
+		return;
+
+	for (i = 0; ; i++) {
+		status = acpi_get_table(ACPI_SIG_SPMI, i+1,
+					(struct acpi_table_header **)&spmi);
+		if (status != AE_OK)
+			return;
+
+		try_init_spmi(spmi);
+	}
+}
+#else
+static void spmi_find_bmc(void) { }
+#endif
+
+#ifdef CONFIG_DMI
+static int decode_dmi(const struct dmi_device *dmi_dev)
+{
+	struct dmi_header *dm = dmi_dev->device_data;
+	u8             *data = (u8 *) dm;
+	u8             len = dm->length;
+	unsigned short myaddr;
+	int            slave_addr;
+
+	if (num_addrs >= MAX_SSIF_BMCS)
+		return -1;
+
+	if (len < 9)
+		return -1;
+
+	if (data[0x04] != 4) /* Not SSIF */
+		return -1;
+
+	if ((data[8] >> 1) == 0) {
+		/*
+		 * Some broken systems put the I2C address in
+		 * the slave address field.  We try to
+		 * accommodate them here.
+		 */
+		myaddr = data[6] >> 1;
+		slave_addr = 0;
+	} else {
+		myaddr = data[8] >> 1;
+		slave_addr = data[6];
+	}
+
+	return new_ssif_client(myaddr, NULL, 0, 0, SI_SMBIOS);
+}
+
+static void dmi_iterator(void)
+{
+	const struct dmi_device *dev = NULL;
+
+	while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev)))
+		decode_dmi(dev);
+}
+#else
+static void dmi_iterator(void) { }
+#endif
+
+static const struct i2c_device_id ssif_id[] = {
+	{ DEVICE_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ssif_id);
+
+static struct i2c_driver ssif_i2c_driver = {
+	.class		= I2C_CLASS_HWMON,
+	.driver		= {
+		.owner			= THIS_MODULE,
+		.name			= DEVICE_NAME
+	},
+	.probe		= ssif_probe,
+	.remove		= ssif_remove,
+	.id_table	= ssif_id,
+	.detect		= ssif_detect
+};
+
+static int init_ipmi_ssif(void)
+{
+	int i;
+	int rv;
+
+	if (initialized)
+		return 0;
+
+	pr_info("IPMI SSIF Interface driver\n");
+
+	/* build list for i2c from addr list */
+	for (i = 0; i < num_addrs; i++) {
+		rv = new_ssif_client(addr[i], adapter_name[i],
+				     dbg[i], slave_addrs[i],
+				     SI_HARDCODED);
+		if (!rv)
+			pr_err(PFX
+			       "Couldn't add hardcoded device at addr 0x%x\n",
+			       addr[i]);
+	}
+
+	if (ssif_tryacpi)
+		ssif_i2c_driver.driver.acpi_match_table	=
+			ACPI_PTR(ssif_acpi_match);
+	if (ssif_trydmi)
+		dmi_iterator();
+	if (ssif_tryacpi)
+		spmi_find_bmc();
+
+	ssif_i2c_driver.address_list = ssif_address_list();
+
+	rv = i2c_add_driver(&ssif_i2c_driver);
+	if (!rv)
+		initialized = true;
+
+	return rv;
+}
+module_init(init_ipmi_ssif);
+
+static void cleanup_ipmi_ssif(void)
+{
+	if (!initialized)
+		return;
+
+	initialized = false;
+
+	i2c_del_driver(&ssif_i2c_driver);
+
+	free_ssif_clients();
+}
+module_exit(cleanup_ipmi_ssif);
+
+MODULE_AUTHOR("Todd C Davis <todd.c.davis@intel.com>, Corey Minyard <minyard@acm.org>");
+MODULE_DESCRIPTION("IPMI driver for management controllers on a SMBus");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7bb9d65..e554111 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -283,7 +283,7 @@
  * @cb:		[in]	the callback to remove
  *
  * Remove a previously queued callback from the fence. This function returns
- * true if the callback is succesfully removed, or false if the fence has
+ * true if the callback is successfully removed, or false if the fence has
  * already been signaled.
  *
  * *WARNING*:
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index de46982..f2b2c4e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -107,6 +107,13 @@
 	help
 	  Support the Atmel AHB DMA controller.
 
+config AT_XDMAC
+	tristate "Atmel XDMA support"
+	depends on ARCH_AT91
+	select DMA_ENGINE
+	help
+	  Support the Atmel XDMA controller.
+
 config FSL_DMA
 	tristate "Freescale Elo series DMA support"
 	depends on FSL_SOC
@@ -395,12 +402,12 @@
 
 config DMA_SUN6I
 	tristate "Allwinner A31 SoCs DMA support"
-	depends on MACH_SUN6I || COMPILE_TEST
+	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
 	depends on RESET_CONTROLLER
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support for the DMA engine for Allwinner A31 SoCs.
+	  Support for the DMA engine first found in Allwinner A31 SoCs.
 
 config NBPFAXI_DMA
 	tristate "Renesas Type-AXI NBPF DMA support"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index cb626c1..2022b54 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -16,6 +16,7 @@
 obj-$(CONFIG_MV_XOR) += mv_xor.o
 obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_SH_DMAE_BASE) += sh/
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e34024b..1364d00 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -2164,7 +2164,6 @@
 			 __func__, ret);
 		goto out_no_memcpy;
 	}
-	pl08x->memcpy.chancnt = ret;
 
 	/* Register slave channels */
 	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
@@ -2175,7 +2174,6 @@
 				__func__, ret);
 		goto out_no_slave;
 	}
-	pl08x->slave.chancnt = ret;
 
 	ret = dma_async_device_register(&pl08x->memcpy);
 	if (ret) {
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
new file mode 100644
index 0000000..b60d77a2
--- /dev/null
+++ b/drivers/dma/at_xdmac.c
@@ -0,0 +1,1524 @@
+/*
+ * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
+ *
+ * Copyright (C) 2014 Atmel Corporation
+ *
+ * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/barrier.h>
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include "dmaengine.h"
+
+/* Global registers */
+#define AT_XDMAC_GTYPE		0x00	/* Global Type Register */
+#define		AT_XDMAC_NB_CH(i)	(((i) & 0x1F) + 1)		/* Number of Channels Minus One */
+#define		AT_XDMAC_FIFO_SZ(i)	(((i) >> 5) & 0x7FF)		/* Number of Bytes */
+#define		AT_XDMAC_NB_REQ(i)	((((i) >> 16) & 0x3F) + 1)	/* Number of Peripheral Requests Minus One */
+#define AT_XDMAC_GCFG		0x04	/* Global Configuration Register */
+#define AT_XDMAC_GWAC		0x08	/* Global Weighted Arbiter Configuration Register */
+#define AT_XDMAC_GIE		0x0C	/* Global Interrupt Enable Register */
+#define AT_XDMAC_GID		0x10	/* Global Interrupt Disable Register */
+#define AT_XDMAC_GIM		0x14	/* Global Interrupt Mask Register */
+#define AT_XDMAC_GIS		0x18	/* Global Interrupt Status Register */
+#define AT_XDMAC_GE		0x1C	/* Global Channel Enable Register */
+#define AT_XDMAC_GD		0x20	/* Global Channel Disable Register */
+#define AT_XDMAC_GS		0x24	/* Global Channel Status Register */
+#define AT_XDMAC_GRS		0x28	/* Global Channel Read Suspend Register */
+#define AT_XDMAC_GWS		0x2C	/* Global Write Suspend Register */
+#define AT_XDMAC_GRWS		0x30	/* Global Channel Read Write Suspend Register */
+#define AT_XDMAC_GRWR		0x34	/* Global Channel Read Write Resume Register */
+#define AT_XDMAC_GSWR		0x38	/* Global Channel Software Request Register */
+#define AT_XDMAC_GSWS		0x3C	/* Global channel Software Request Status Register */
+#define AT_XDMAC_GSWF		0x40	/* Global Channel Software Flush Request Register */
+#define AT_XDMAC_VERSION	0xFFC	/* XDMAC Version Register */
+
+/* Channel relative registers offsets */
+#define AT_XDMAC_CIE		0x00	/* Channel Interrupt Enable Register */
+#define		AT_XDMAC_CIE_BIE	BIT(0)	/* End of Block Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_LIE	BIT(1)	/* End of Linked List Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_DIE	BIT(2)	/* End of Disable Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_FIE	BIT(3)	/* End of Flush Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_RBEIE	BIT(4)	/* Read Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_WBEIE	BIT(5)	/* Write Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_ROIE	BIT(6)	/* Request Overflow Interrupt Enable Bit */
+#define AT_XDMAC_CID		0x04	/* Channel Interrupt Disable Register */
+#define		AT_XDMAC_CID_BID	BIT(0)	/* End of Block Interrupt Disable Bit */
+#define		AT_XDMAC_CID_LID	BIT(1)	/* End of Linked List Interrupt Disable Bit */
+#define		AT_XDMAC_CID_DID	BIT(2)	/* End of Disable Interrupt Disable Bit */
+#define		AT_XDMAC_CID_FID	BIT(3)	/* End of Flush Interrupt Disable Bit */
+#define		AT_XDMAC_CID_RBEID	BIT(4)	/* Read Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_WBEID	BIT(5)	/* Write Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_ROID	BIT(6)	/* Request Overflow Interrupt Disable Bit */
+#define AT_XDMAC_CIM		0x08	/* Channel Interrupt Mask Register */
+#define		AT_XDMAC_CIM_BIM	BIT(0)	/* End of Block Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_LIM	BIT(1)	/* End of Linked List Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_DIM	BIT(2)	/* End of Disable Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_FIM	BIT(3)	/* End of Flush Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_RBEIM	BIT(4)	/* Read Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_WBEIM	BIT(5)	/* Write Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_ROIM	BIT(6)	/* Request Overflow Interrupt Mask Bit */
+#define AT_XDMAC_CIS		0x0C	/* Channel Interrupt Status Register */
+#define		AT_XDMAC_CIS_BIS	BIT(0)	/* End of Block Interrupt Status Bit */
+#define		AT_XDMAC_CIS_LIS	BIT(1)	/* End of Linked List Interrupt Status Bit */
+#define		AT_XDMAC_CIS_DIS	BIT(2)	/* End of Disable Interrupt Status Bit */
+#define		AT_XDMAC_CIS_FIS	BIT(3)	/* End of Flush Interrupt Status Bit */
+#define		AT_XDMAC_CIS_RBEIS	BIT(4)	/* Read Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_WBEIS	BIT(5)	/* Write Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_ROIS	BIT(6)	/* Request Overflow Interrupt Status Bit */
+#define AT_XDMAC_CSA		0x10	/* Channel Source Address Register */
+#define AT_XDMAC_CDA		0x14	/* Channel Destination Address Register */
+#define AT_XDMAC_CNDA		0x18	/* Channel Next Descriptor Address Register */
+#define		AT_XDMAC_CNDA_NDAIF(i)	((i) & 0x1)			/* Channel x Next Descriptor Interface */
+#define		AT_XDMAC_CNDA_NDA(i)	((i) & 0xfffffffc)		/* Channel x Next Descriptor Address */
+#define AT_XDMAC_CNDC		0x1C	/* Channel Next Descriptor Control Register */
+#define		AT_XDMAC_CNDC_NDE		(0x1 << 0)		/* Channel x Next Descriptor Enable */
+#define		AT_XDMAC_CNDC_NDSUP		(0x1 << 1)		/* Channel x Next Descriptor Source Update */
+#define		AT_XDMAC_CNDC_NDDUP		(0x1 << 2)		/* Channel x Next Descriptor Destination Update */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV0	(0x0 << 3)		/* Channel x Next Descriptor View 0 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV1	(0x1 << 3)		/* Channel x Next Descriptor View 1 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV2	(0x2 << 3)		/* Channel x Next Descriptor View 2 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV3	(0x3 << 3)		/* Channel x Next Descriptor View 3 */
+#define AT_XDMAC_CUBC		0x20	/* Channel Microblock Control Register */
+#define AT_XDMAC_CBC		0x24	/* Channel Block Control Register */
+#define AT_XDMAC_CC		0x28	/* Channel Configuration Register */
+#define		AT_XDMAC_CC_TYPE	(0x1 << 0)	/* Channel Transfer Type */
+#define			AT_XDMAC_CC_TYPE_MEM_TRAN	(0x0 << 0)	/* Memory to Memory Transfer */
+#define			AT_XDMAC_CC_TYPE_PER_TRAN	(0x1 << 0)	/* Peripheral to Memory or Memory to Peripheral Transfer */
+#define		AT_XDMAC_CC_MBSIZE_MASK	(0x3 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SINGLE	(0x0 << 1)
+#define			AT_XDMAC_CC_MBSIZE_FOUR		(0x1 << 1)
+#define			AT_XDMAC_CC_MBSIZE_EIGHT	(0x2 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SIXTEEN	(0x3 << 1)
+#define		AT_XDMAC_CC_DSYNC	(0x1 << 4)	/* Channel Synchronization */
+#define			AT_XDMAC_CC_DSYNC_PER2MEM	(0x0 << 4)
+#define			AT_XDMAC_CC_DSYNC_MEM2PER	(0x1 << 4)
+#define		AT_XDMAC_CC_PROT	(0x1 << 5)	/* Channel Protection */
+#define			AT_XDMAC_CC_PROT_SEC		(0x0 << 5)
+#define			AT_XDMAC_CC_PROT_UNSEC		(0x1 << 5)
+#define		AT_XDMAC_CC_SWREQ	(0x1 << 6)	/* Channel Software Request Trigger */
+#define			AT_XDMAC_CC_SWREQ_HWR_CONNECTED	(0x0 << 6)
+#define			AT_XDMAC_CC_SWREQ_SWR_CONNECTED	(0x1 << 6)
+#define		AT_XDMAC_CC_MEMSET	(0x1 << 7)	/* Channel Fill Block of memory */
+#define			AT_XDMAC_CC_MEMSET_NORMAL_MODE	(0x0 << 7)
+#define			AT_XDMAC_CC_MEMSET_HW_MODE	(0x1 << 7)
+#define		AT_XDMAC_CC_CSIZE(i)	((0x7 & (i)) << 8)	/* Channel Chunk Size */
+#define		AT_XDMAC_CC_DWIDTH_OFFSET	11
+#define		AT_XDMAC_CC_DWIDTH_MASK	(0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
+#define		AT_XDMAC_CC_DWIDTH(i)	((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET)	/* Channel Data Width */
+#define			AT_XDMAC_CC_DWIDTH_BYTE		0x0
+#define			AT_XDMAC_CC_DWIDTH_HALFWORD	0x1
+#define			AT_XDMAC_CC_DWIDTH_WORD		0x2
+#define			AT_XDMAC_CC_DWIDTH_DWORD	0x3
+#define		AT_XDMAC_CC_SIF(i)	((0x1 & (i)) << 13)	/* Channel Source Interface Identifier */
+#define		AT_XDMAC_CC_DIF(i)	((0x1 & (i)) << 14)	/* Channel Destination Interface Identifier */
+#define		AT_XDMAC_CC_SAM_MASK	(0x3 << 16)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_SAM_FIXED_AM	(0x0 << 16)
+#define			AT_XDMAC_CC_SAM_INCREMENTED_AM	(0x1 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_AM		(0x2 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_DS_AM	(0x3 << 16)
+#define		AT_XDMAC_CC_DAM_MASK	(0x3 << 18)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_DAM_FIXED_AM	(0x0 << 18)
+#define			AT_XDMAC_CC_DAM_INCREMENTED_AM	(0x1 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_AM		(0x2 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_DS_AM	(0x3 << 18)
+#define		AT_XDMAC_CC_INITD	(0x1 << 21)	/* Channel Initialization Terminated (read only) */
+#define			AT_XDMAC_CC_INITD_TERMINATED	(0x0 << 21)
+#define			AT_XDMAC_CC_INITD_IN_PROGRESS	(0x1 << 21)
+#define		AT_XDMAC_CC_RDIP	(0x1 << 22)	/* Read in Progress (read only) */
+#define			AT_XDMAC_CC_RDIP_DONE		(0x0 << 22)
+#define			AT_XDMAC_CC_RDIP_IN_PROGRESS	(0x1 << 22)
+#define		AT_XDMAC_CC_WRIP	(0x1 << 23)	/* Write in Progress (read only) */
+#define			AT_XDMAC_CC_WRIP_DONE		(0x0 << 23)
+#define			AT_XDMAC_CC_WRIP_IN_PROGRESS	(0x1 << 23)
+#define		AT_XDMAC_CC_PERID(i)	(0x7f & (h) << 24)	/* Channel Peripheral Identifier */
+#define AT_XDMAC_CDS_MSP	0x2C	/* Channel Data Stride Memory Set Pattern */
+#define AT_XDMAC_CSUS		0x30	/* Channel Source Microblock Stride */
+#define AT_XDMAC_CDUS		0x34	/* Channel Destination Microblock Stride */
+
+#define AT_XDMAC_CHAN_REG_BASE	0x50	/* Channel registers base address */
+
+/* Microblock control members */
+#define AT_XDMAC_MBR_UBC_UBLEN_MAX	0xFFFFFFUL	/* Maximum Microblock Length */
+#define AT_XDMAC_MBR_UBC_NDE		(0x1 << 24)	/* Next Descriptor Enable */
+#define AT_XDMAC_MBR_UBC_NSEN		(0x1 << 25)	/* Next Descriptor Source Update */
+#define AT_XDMAC_MBR_UBC_NDEN		(0x1 << 26)	/* Next Descriptor Destination Update */
+#define AT_XDMAC_MBR_UBC_NDV0		(0x0 << 27)	/* Next Descriptor View 0 */
+#define AT_XDMAC_MBR_UBC_NDV1		(0x1 << 27)	/* Next Descriptor View 1 */
+#define AT_XDMAC_MBR_UBC_NDV2		(0x2 << 27)	/* Next Descriptor View 2 */
+#define AT_XDMAC_MBR_UBC_NDV3		(0x3 << 27)	/* Next Descriptor View 3 */
+
+#define AT_XDMAC_MAX_CHAN	0x20
+
+enum atc_status {
+	AT_XDMAC_CHAN_IS_CYCLIC = 0,
+	AT_XDMAC_CHAN_IS_PAUSED,
+};
+
+/* ----- Channels ----- */
+struct at_xdmac_chan {
+	struct dma_chan			chan;
+	void __iomem			*ch_regs;
+	u32				mask;		/* Channel Mask */
+	u32				cfg[3];		/* Channel Configuration Register */
+	#define	AT_XDMAC_CUR_CFG	0		/* Current channel conf */
+	#define	AT_XDMAC_DEV_TO_MEM_CFG	1		/* Predifined dev to mem channel conf */
+	#define	AT_XDMAC_MEM_TO_DEV_CFG	2		/* Predifined mem to dev channel conf */
+	u8				perid;		/* Peripheral ID */
+	u8				perif;		/* Peripheral Interface */
+	u8				memif;		/* Memory Interface */
+	u32				per_src_addr;
+	u32				per_dst_addr;
+	u32				save_cim;
+	u32				save_cnda;
+	u32				save_cndc;
+	unsigned long			status;
+	struct tasklet_struct		tasklet;
+
+	spinlock_t			lock;
+
+	struct list_head		xfers_list;
+	struct list_head		free_descs_list;
+};
+
+
+/* ----- Controller ----- */
+struct at_xdmac {
+	struct dma_device	dma;
+	void __iomem		*regs;
+	int			irq;
+	struct clk		*clk;
+	u32			save_gim;
+	u32			save_gs;
+	struct dma_pool		*at_xdmac_desc_pool;
+	struct at_xdmac_chan	chan[0];
+};
+
+
+/* ----- Descriptors ----- */
+
+/* Linked List Descriptor */
+struct at_xdmac_lld {
+	dma_addr_t	mbr_nda;	/* Next Descriptor Member */
+	u32		mbr_ubc;	/* Microblock Control Member */
+	dma_addr_t	mbr_sa;		/* Source Address Member */
+	dma_addr_t	mbr_da;		/* Destination Address Member */
+	u32		mbr_cfg;	/* Configuration Register */
+};
+
+
+struct at_xdmac_desc {
+	struct at_xdmac_lld		lld;
+	enum dma_transfer_direction	direction;
+	struct dma_async_tx_descriptor	tx_dma_desc;
+	struct list_head		desc_node;
+	/* Following members are only used by the first descriptor */
+	bool				active_xfer;
+	unsigned int			xfer_size;
+	struct list_head		descs_list;
+	struct list_head		xfer_node;
+};
+
+static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
+{
+	return atxdmac->regs + (AT_XDMAC_CHAN_REG_BASE + chan_nb * 0x40);
+}
+
+#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
+#define at_xdmac_write(atxdmac, reg, value) \
+	writel_relaxed((value), (atxdmac)->regs + (reg))
+
+#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
+#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))
+
+static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_xdmac_chan, chan);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_xdmac, dma);
+}
+
+static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
+}
+
+static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+}
+
+static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+}
+
+static inline int at_xdmac_csize(u32 maxburst)
+{
+	int csize;
+
+	csize = ffs(maxburst) - 1;
+	if (csize > 4)
+		csize = -EINVAL;
+
+	return csize;
+};
+
+static inline u8 at_xdmac_get_dwidth(u32 cfg)
+{
+	return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
+};
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+
+static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
+{
+	return at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask;
+}
+
+static void at_xdmac_off(struct at_xdmac *atxdmac)
+{
+	at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);
+
+	/* Wait that all chans are disabled. */
+	while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
+		cpu_relax();
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);
+}
+
+/* Call with lock hold. */
+static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *first)
+{
+	struct at_xdmac	*atxdmac = to_at_xdmac(atchan->chan.device);
+	u32		reg;
+
+	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
+
+	if (at_xdmac_chan_is_enabled(atchan))
+		return;
+
+	/* Set transfer as active to not try to start it again. */
+	first->active_xfer = true;
+
+	/* Tell xdmac where to get the first descriptor. */
+	reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys)
+	      | AT_XDMAC_CNDA_NDAIF(atchan->memif);
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);
+
+	/*
+	 * When doing memory to memory transfer we need to use the next
+	 * descriptor view 2 since some fields of the configuration register
+	 * depend on transfer size and src/dest addresses.
+	 */
+	if (is_slave_direction(first->direction)) {
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
+		if (first->direction == DMA_MEM_TO_DEV)
+			atchan->cfg[AT_XDMAC_CUR_CFG] =
+				atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		else
+			atchan->cfg[AT_XDMAC_CUR_CFG] =
+				atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		at_xdmac_chan_write(atchan, AT_XDMAC_CC,
+				    atchan->cfg[AT_XDMAC_CUR_CFG]);
+	} else {
+		/*
+		 * No need to write AT_XDMAC_CC reg, it will be done when the
+		 * descriptor is fecthed.
+		 */
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
+	}
+
+	reg |= AT_XDMAC_CNDC_NDDUP
+	       | AT_XDMAC_CNDC_NDSUP
+	       | AT_XDMAC_CNDC_NDE;
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+	at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
+	reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE | AT_XDMAC_CIE_ROIE;
+	/*
+	 * There is no end of list when doing cyclic dma, we need to get
+	 * an interrupt after each periods.
+	 */
+	if (at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_BIE);
+	else
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_LIE);
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
+	wmb();
+	at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+}
+
+static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct at_xdmac_desc	*desc = txd_to_at_desc(tx);
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(tx->chan);
+	dma_cookie_t		cookie;
+
+	spin_lock_bh(&atchan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
+		 __func__, atchan, desc);
+	list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+	if (list_is_singular(&atchan->xfers_list))
+		at_xdmac_start_xfer(atchan, desc);
+
+	spin_unlock_bh(&atchan->lock);
+	return cookie;
+}
+
+static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
+						 gfp_t gfp_flags)
+{
+	struct at_xdmac_desc	*desc;
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	dma_addr_t		phys;
+
+	desc = dma_pool_alloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
+	if (desc) {
+		memset(desc, 0, sizeof(*desc));
+		INIT_LIST_HEAD(&desc->descs_list);
+		dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
+		desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
+		desc->tx_dma_desc.phys = phys;
+	}
+
+	return desc;
+}
+
+/* Call must be protected by lock. */
+static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc *desc;
+
+	if (list_empty(&atchan->free_descs_list)) {
+		desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
+	} else {
+		desc = list_first_entry(&atchan->free_descs_list,
+					struct at_xdmac_desc, desc_node);
+		list_del(&desc->desc_node);
+		desc->active_xfer = false;
+	}
+
+	return desc;
+}
+
+static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *of_dma)
+{
+	struct at_xdmac		*atxdmac = of_dma->of_dma_data;
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan;
+	struct device		*dev = atxdmac->dma.dev;
+
+	if (dma_spec->args_count != 1) {
+		dev_err(dev, "dma phandler args: bad number of args\n");
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&atxdmac->dma);
+	if (!chan) {
+		dev_err(dev, "can't get a dma channel\n");
+		return NULL;
+	}
+
+	atchan = to_at_xdmac_chan(chan);
+	atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
+	atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
+	atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
+	dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
+		 atchan->memif, atchan->perif, atchan->perid);
+
+	return chan;
+}
+
+static int at_xdmac_set_slave_config(struct dma_chan *chan,
+				      struct dma_slave_config *sconfig)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	u8 dwidth;
+	int csize;
+
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] =
+		AT91_XDMAC_DT_PERID(atchan->perid)
+		| AT_XDMAC_CC_DAM_INCREMENTED_AM
+		| AT_XDMAC_CC_SAM_FIXED_AM
+		| AT_XDMAC_CC_DIF(atchan->memif)
+		| AT_XDMAC_CC_SIF(atchan->perif)
+		| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+		| AT_XDMAC_CC_DSYNC_PER2MEM
+		| AT_XDMAC_CC_MBSIZE_SIXTEEN
+		| AT_XDMAC_CC_TYPE_PER_TRAN;
+	csize = at_xdmac_csize(sconfig->src_maxburst);
+	if (csize < 0) {
+		dev_err(chan2dev(chan), "invalid src maxburst value\n");
+		return -EINVAL;
+	}
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_CSIZE(csize);
+	dwidth = ffs(sconfig->src_addr_width) - 1;
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] =
+		AT91_XDMAC_DT_PERID(atchan->perid)
+		| AT_XDMAC_CC_DAM_FIXED_AM
+		| AT_XDMAC_CC_SAM_INCREMENTED_AM
+		| AT_XDMAC_CC_DIF(atchan->perif)
+		| AT_XDMAC_CC_SIF(atchan->memif)
+		| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+		| AT_XDMAC_CC_DSYNC_MEM2PER
+		| AT_XDMAC_CC_MBSIZE_SIXTEEN
+		| AT_XDMAC_CC_TYPE_PER_TRAN;
+	csize = at_xdmac_csize(sconfig->dst_maxburst);
+	if (csize < 0) {
+		dev_err(chan2dev(chan), "invalid src maxburst value\n");
+		return -EINVAL;
+	}
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_CSIZE(csize);
+	dwidth = ffs(sconfig->dst_addr_width) - 1;
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	/* Src and dst addr are needed to configure the link list descriptor. */
+	atchan->per_src_addr = sconfig->src_addr;
+	atchan->per_dst_addr = sconfig->dst_addr;
+
+	dev_dbg(chan2dev(chan),
+		"%s: cfg[dev2mem]=0x%08x, cfg[mem2dev]=0x%08x, per_src_addr=0x%08x, per_dst_addr=0x%08x\n",
+		__func__, atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG],
+		atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG],
+		atchan->per_src_addr, atchan->per_dst_addr);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, enum dma_transfer_direction direction,
+		       unsigned long flags, void *context)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	struct scatterlist	*sg;
+	int			i;
+	u32			cfg;
+	unsigned int		xfer_size = 0;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
+		 __func__, sg_len,
+		 direction == DMA_MEM_TO_DEV ? "to device" : "from device",
+		 flags);
+
+	/* Protect dma_sconfig field that can be modified by set_slave_conf. */
+	spin_lock_bh(&atchan->lock);
+
+	/* Prepare descriptors. */
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct at_xdmac_desc	*desc = NULL;
+		u32			len, mem;
+
+		len = sg_dma_len(sg);
+		mem = sg_dma_address(sg);
+		if (unlikely(!len)) {
+			dev_err(chan2dev(chan), "sg data length is zero\n");
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+		dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
+			 __func__, i, len, mem);
+
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+
+		/* Linked list descriptor setup. */
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->per_src_addr;
+			desc->lld.mbr_da = mem;
+			cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		} else {
+			desc->lld.mbr_sa = mem;
+			desc->lld.mbr_da = atchan->per_dst_addr;
+			cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		}
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1		/* next descriptor view */
+			| AT_XDMAC_MBR_UBC_NDEN				/* next descriptor dst parameter update */
+			| AT_XDMAC_MBR_UBC_NSEN				/* next descriptor src parameter update */
+			| (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE)	/* descriptor fetch */
+			| len / (1 << at_xdmac_get_dwidth(cfg));	/* microblock length */
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+				 __func__, prev, &prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+		xfer_size += len;
+	}
+
+	spin_unlock_bh(&atchan->lock);
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = xfer_size;
+	first->direction = direction;
+
+	return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			 size_t buf_len, size_t period_len,
+			 enum dma_transfer_direction direction,
+			 unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	unsigned int		periods = buf_len / period_len;
+	int			i;
+	u32			cfg;
+
+	dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
+		__func__, &buf_addr, buf_len, period_len,
+		direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
+		dev_err(chan2dev(chan), "channel currently used\n");
+		return NULL;
+	}
+
+	for (i = 0; i < periods; i++) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		spin_lock_bh(&atchan->lock);
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+		spin_unlock_bh(&atchan->lock);
+		dev_dbg(chan2dev(chan),
+			"%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
+			__func__, desc, &desc->tx_dma_desc.phys);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->per_src_addr;
+			desc->lld.mbr_da = buf_addr + i * period_len;
+			cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		} else {
+			desc->lld.mbr_sa = buf_addr + i * period_len;
+			desc->lld.mbr_da = atchan->per_dst_addr;
+			cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		}
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| AT_XDMAC_MBR_UBC_NDE
+			| period_len >> at_xdmac_get_dwidth(cfg);
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+				 __func__, prev, &prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	prev->lld.mbr_nda = first->tx_dma_desc.phys;
+	dev_dbg(chan2dev(chan),
+		"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = buf_len;
+	first->direction = direction;
+
+	return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	size_t			remaining_size = len, xfer_size = 0, ublen;
+	dma_addr_t		src_addr = src, dst_addr = dest;
+	u32			dwidth;
+	/*
+	 * WARNING: We don't know the direction, it involves we can't
+	 * dynamically set the source and dest interface so we have to use the
+	 * same one. Only interface 0 allows EBI access. Hopefully we can
+	 * access DDR through both ports (at least on SAMA5D4x), so we can use
+	 * the same interface for source and dest, that solves the fact we
+	 * don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
+		__func__, &src, &dest, len, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	/*
+	 * Check address alignment to select the greater data width we can use.
+	 * Some XDMAC implementations don't provide dword transfer, in this
+	 * case selecting dword has the same behavior as selecting word transfers.
+	 */
+	if (!((src_addr | dst_addr) & 7)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!((src_addr | dst_addr)  & 3)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!((src_addr | dst_addr) & 1)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	/* Prepare descriptors. */
+	while (remaining_size) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
+
+		spin_lock_bh(&atchan->lock);
+		desc = at_xdmac_get_desc(atchan);
+		spin_unlock_bh(&atchan->lock);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			return NULL;
+		}
+
+		/* Update src and dest addresses. */
+		src_addr += xfer_size;
+		dst_addr += xfer_size;
+
+		if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
+			xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
+		else
+			xfer_size = remaining_size;
+
+		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
+
+		/* Check remaining length and change data width if needed. */
+		if (!((src_addr | dst_addr | xfer_size) & 7)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+		} else if (!((src_addr | dst_addr | xfer_size)  & 3)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_WORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+		} else if (!((src_addr | dst_addr | xfer_size) & 1)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+		} else if ((src_addr | dst_addr | xfer_size) & 1) {
+			dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
+			dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+		}
+		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+		ublen = xfer_size >> dwidth;
+		remaining_size -= xfer_size;
+
+		desc->lld.mbr_sa = src_addr;
+		desc->lld.mbr_da = dst_addr;
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0)
+			| ublen;
+		desc->lld.mbr_cfg = chan_cc;
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n",
+				 __func__, prev, prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
+static enum dma_status
+at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	struct at_xdmac_desc	*desc, *_desc;
+	struct list_head	*descs_list;
+	enum dma_status		ret;
+	int			residue;
+	u32			cur_nda, mask, value;
+	u8			dwidth = at_xdmac_get_dwidth(atchan->cfg[AT_XDMAC_CUR_CFG]);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!txstate)
+		return ret;
+
+	spin_lock_bh(&atchan->lock);
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+
+	/*
+	 * If the transfer has not been started yet, don't need to compute the
+	 * residue, it's the transfer length.
+	 */
+	if (!desc->active_xfer) {
+		dma_set_residue(txstate, desc->xfer_size);
+		spin_unlock_bh(&atchan->lock);
+		return ret;
+	}
+
+	residue = desc->xfer_size;
+	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized.
+	 */
+	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
+	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
+	if ((atchan->cfg[AT_XDMAC_CUR_CFG] & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+	/*
+	 * Remove size of all microblocks already transferred and the current
+	 * one. Then add the remaining size to transfer of the current
+	 * microblock.
+	 */
+	descs_list = &desc->descs_list;
+	list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
+		residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
+		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+			break;
+	}
+	residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+
+	spin_unlock_bh(&atchan->lock);
+
+	dma_set_residue(txstate, residue);
+
+	dev_dbg(chan2dev(chan),
+		 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
+		 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
+
+	return ret;
+}
+
+/* Call must be protected by lock. */
+static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
+				    struct at_xdmac_desc *desc)
+{
+	dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+
+	/*
+	 * Remove the transfer from the transfer list then move the transfer
+	 * descriptors into the free descriptors list.
+	 */
+	list_del(&desc->xfer_node);
+	list_splice_init(&desc->descs_list, &atchan->free_descs_list);
+}
+
+static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc	*desc;
+
+	spin_lock_bh(&atchan->lock);
+
+	/*
+	 * If channel is enabled, do nothing, advance_work will be triggered
+	 * after the interruption.
+	 */
+	if (!at_xdmac_chan_is_enabled(atchan) && !list_empty(&atchan->xfers_list)) {
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		if (!desc->active_xfer)
+			at_xdmac_start_xfer(atchan, desc);
+	}
+
+	spin_unlock_bh(&atchan->lock);
+}
+
+static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc		*desc;
+	struct dma_async_tx_descriptor	*txd;
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+	txd = &desc->tx_dma_desc;
+
+	if (txd->callback && (txd->flags & DMA_PREP_INTERRUPT))
+		txd->callback(txd->callback_param);
+}
+
+static void at_xdmac_tasklet(unsigned long data)
+{
+	struct at_xdmac_chan	*atchan = (struct at_xdmac_chan *)data;
+	struct at_xdmac_desc	*desc;
+	u32			error_mask;
+
+	dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n",
+		 __func__, atchan->status);
+
+	error_mask = AT_XDMAC_CIS_RBEIS
+		     | AT_XDMAC_CIS_WBEIS
+		     | AT_XDMAC_CIS_ROIS;
+
+	if (at_xdmac_chan_is_cyclic(atchan)) {
+		at_xdmac_handle_cyclic(atchan);
+	} else if ((atchan->status & AT_XDMAC_CIS_LIS)
+		   || (atchan->status & error_mask)) {
+		struct dma_async_tx_descriptor  *txd;
+
+		if (atchan->status & AT_XDMAC_CIS_RBEIS)
+			dev_err(chan2dev(&atchan->chan), "read bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_WBEIS)
+			dev_err(chan2dev(&atchan->chan), "write bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_ROIS)
+			dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
+
+		spin_lock_bh(&atchan->lock);
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		BUG_ON(!desc->active_xfer);
+
+		txd = &desc->tx_dma_desc;
+
+		at_xdmac_remove_xfer(atchan, desc);
+		spin_unlock_bh(&atchan->lock);
+
+		if (!at_xdmac_chan_is_cyclic(atchan)) {
+			dma_cookie_complete(txd);
+			if (txd->callback && (txd->flags & DMA_PREP_INTERRUPT))
+				txd->callback(txd->callback_param);
+		}
+
+		dma_run_dependencies(txd);
+
+		at_xdmac_advance_work(atchan);
+	}
+}
+
+static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
+{
+	struct at_xdmac		*atxdmac = (struct at_xdmac *)dev_id;
+	struct at_xdmac_chan	*atchan;
+	u32			imr, status, pending;
+	u32			chan_imr, chan_status;
+	int			i, ret = IRQ_NONE;
+
+	do {
+		imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+		status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
+		pending = status & imr;
+
+		dev_vdbg(atxdmac->dma.dev,
+			 "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
+			 __func__, status, imr, pending);
+
+		if (!pending)
+			break;
+
+		/* We have to find which channel has generated the interrupt. */
+		for (i = 0; i < atxdmac->dma.chancnt; i++) {
+			if (!((1 << i) & pending))
+				continue;
+
+			atchan = &atxdmac->chan[i];
+			chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
+			atchan->status = chan_status & chan_imr;
+			dev_vdbg(atxdmac->dma.dev,
+				 "%s: chan%d: imr=0x%x, status=0x%x\n",
+				 __func__, i, chan_imr, chan_status);
+			dev_vdbg(chan2dev(&atchan->chan),
+				 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+				 __func__,
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+			if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
+				at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+
+			tasklet_schedule(&atchan->tasklet);
+			ret = IRQ_HANDLED;
+		}
+
+	} while (pending);
+
+	return ret;
+}
+
+static void at_xdmac_issue_pending(struct dma_chan *chan)
+{
+	struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+	dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
+
+	if (!at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_advance_work(atchan);
+
+	return;
+}
+
+static int at_xdmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			    unsigned long arg)
+{
+	struct at_xdmac_desc	*desc, *_desc;
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	int			ret = 0;
+
+	dev_dbg(chan2dev(chan), "%s: cmd=%d\n", __func__, cmd);
+
+	spin_lock_bh(&atchan->lock);
+
+	switch (cmd) {
+	case DMA_PAUSE:
+		at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask);
+		set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+		break;
+
+	case DMA_RESUME:
+		if (!at_xdmac_chan_is_paused(atchan))
+			break;
+
+		at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
+		clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+		break;
+
+	case DMA_TERMINATE_ALL:
+		at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+		while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
+			cpu_relax();
+
+		/* Cancel all pending transfers. */
+		list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
+			at_xdmac_remove_xfer(atchan, desc);
+
+		clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+		break;
+
+	case DMA_SLAVE_CONFIG:
+		ret = at_xdmac_set_slave_config(chan,
+				(struct dma_slave_config *)arg);
+		break;
+
+	default:
+		dev_err(chan2dev(chan),
+			"unmanaged or unknown dma control cmd: %d\n", cmd);
+		ret = -ENXIO;
+	}
+
+	spin_unlock_bh(&atchan->lock);
+
+	return ret;
+}
+
+static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+	int			i;
+
+	spin_lock_bh(&atchan->lock);
+
+	if (at_xdmac_chan_is_enabled(atchan)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel enabled)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	if (!list_empty(&atchan->free_descs_list)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel not free from a previous use)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = at_xdmac_alloc_desc(chan, GFP_ATOMIC);
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"only %d descriptors have been allocated\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &atchan->free_descs_list);
+	}
+
+	dma_cookie_init(chan);
+
+	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+
+spin_unlock:
+	spin_unlock_bh(&atchan->lock);
+	return i;
+}
+
+static void at_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	struct at_xdmac_desc	*desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
+		dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
+		list_del(&desc->desc_node);
+		dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
+	}
+
+	return;
+}
+
+#define AT_XDMAC_DMA_BUSWIDTHS\
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static int at_xdmac_device_slave_caps(struct dma_chan *dchan,
+				      struct dma_slave_caps *caps)
+{
+
+	caps->src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = true;
+	caps->cmd_terminate = true;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int atmel_xdmac_prepare(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		/* Wait for transfer completion, except in cyclic case. */
+		if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
+			return -EAGAIN;
+	}
+	return 0;
+}
+#else
+#	define atmel_xdmac_prepare NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int atmel_xdmac_suspend(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			if (!at_xdmac_chan_is_paused(atchan))
+				at_xdmac_control(chan, DMA_PAUSE, 0);
+			atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
+			atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
+		}
+	}
+	atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+
+	at_xdmac_off(atxdmac);
+	clk_disable_unprepare(atxdmac->clk);
+	return 0;
+}
+
+static int atmel_xdmac_resume(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan, *_chan;
+	int			i;
+	u32			cfg;
+
+	clk_prepare_enable(atxdmac->clk);
+
+	/* Clear pending interrupts. */
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		atchan = &atxdmac->chan[i];
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
+	at_xdmac_write(atxdmac, AT_XDMAC_GE, atxdmac->save_gs);
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		atchan = to_at_xdmac_chan(chan);
+		cfg = atchan->cfg[AT_XDMAC_CUR_CFG];
+		at_xdmac_chan_write(atchan, AT_XDMAC_CC, cfg);
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
+			wmb();
+			at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+		}
+	}
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static int at_xdmac_probe(struct platform_device *pdev)
+{
+	struct resource	*res;
+	struct at_xdmac	*atxdmac;
+	int		irq, size, nr_channels, i, ret;
+	void __iomem	*base;
+	u32		reg;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/*
+	 * Read number of xdmac channels, read helper function can't be used
+	 * since atxdmac is not yet allocated and we need to know the number
+	 * of channels to do the allocation.
+	 */
+	reg = readl_relaxed(base + AT_XDMAC_GTYPE);
+	nr_channels = AT_XDMAC_NB_CH(reg);
+	if (nr_channels > AT_XDMAC_MAX_CHAN) {
+		dev_err(&pdev->dev, "invalid number of channels (%u)\n",
+			nr_channels);
+		return -EINVAL;
+	}
+
+	size = sizeof(*atxdmac);
+	size += nr_channels * sizeof(struct at_xdmac_chan);
+	atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!atxdmac) {
+		dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
+		return -ENOMEM;
+	}
+
+	atxdmac->regs = base;
+	atxdmac->irq = irq;
+
+	atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
+	if (IS_ERR(atxdmac->clk)) {
+		dev_err(&pdev->dev, "can't get dma_clk\n");
+		return PTR_ERR(atxdmac->clk);
+	}
+
+	/* Do not use dev res to prevent races with tasklet */
+	ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "can't request irq\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(atxdmac->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "can't prepare or enable clock\n");
+		goto err_free_irq;
+	}
+
+	atxdmac->at_xdmac_desc_pool =
+		dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+				sizeof(struct at_xdmac_desc), 4, 0);
+	if (!atxdmac->at_xdmac_desc_pool) {
+		dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
+	/*
+	 * Without DMA_PRIVATE the driver is not able to allocate more than
+	 * one channel, second allocation fails in private_candidate.
+	 */
+	dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
+	atxdmac->dma.dev				= &pdev->dev;
+	atxdmac->dma.device_alloc_chan_resources	= at_xdmac_alloc_chan_resources;
+	atxdmac->dma.device_free_chan_resources		= at_xdmac_free_chan_resources;
+	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
+	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
+	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
+	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
+	atxdmac->dma.device_control			= at_xdmac_control;
+	atxdmac->dma.device_slave_caps			= at_xdmac_device_slave_caps;
+
+	/* Disable all chans and interrupts. */
+	at_xdmac_off(atxdmac);
+
+	/* Init channels. */
+	INIT_LIST_HEAD(&atxdmac->dma.channels);
+	for (i = 0; i < nr_channels; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		atchan->chan.device = &atxdmac->dma;
+		list_add_tail(&atchan->chan.device_node,
+			      &atxdmac->dma.channels);
+
+		atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
+		atchan->mask = 1 << i;
+
+		spin_lock_init(&atchan->lock);
+		INIT_LIST_HEAD(&atchan->xfers_list);
+		INIT_LIST_HEAD(&atchan->free_descs_list);
+		tasklet_init(&atchan->tasklet, at_xdmac_tasklet,
+			     (unsigned long)atchan);
+
+		/* Clear pending interrupts. */
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+	platform_set_drvdata(pdev, atxdmac);
+
+	ret = dma_async_device_register(&atxdmac->dma);
+	if (ret) {
+		dev_err(&pdev->dev, "fail to register DMA engine device\n");
+		goto err_clk_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 at_xdmac_xlate, atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register of dma controller\n");
+		goto err_dma_unregister;
+	}
+
+	dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
+		 nr_channels, atxdmac->regs);
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&atxdmac->dma);
+err_clk_disable:
+	clk_disable_unprepare(atxdmac->clk);
+err_free_irq:
+	free_irq(atxdmac->irq, atxdmac->dma.dev);
+	return ret;
+}
+
+static int at_xdmac_remove(struct platform_device *pdev)
+{
+	struct at_xdmac	*atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+	int		i;
+
+	at_xdmac_off(atxdmac);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&atxdmac->dma);
+	clk_disable_unprepare(atxdmac->clk);
+
+	synchronize_irq(atxdmac->irq);
+
+	free_irq(atxdmac->irq, atxdmac->dma.dev);
+
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		tasklet_kill(&atchan->tasklet);
+		at_xdmac_free_chan_resources(&atchan->chan);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops atmel_xdmac_dev_pm_ops = {
+	.prepare	= atmel_xdmac_prepare,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
+};
+
+static const struct of_device_id atmel_xdmac_dt_ids[] = {
+	{
+		.compatible = "atmel,sama5d4-dma",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);
+
+static struct platform_driver at_xdmac_driver = {
+	.probe		= at_xdmac_probe,
+	.remove		= at_xdmac_remove,
+	.driver = {
+		.name		= "at_xdmac",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match_ptr(atmel_xdmac_dt_ids),
+		.pm		= &atmel_xdmac_dev_pm_ops,
+	}
+};
+
+static int __init at_xdmac_init(void)
+{
+	return platform_driver_probe(&at_xdmac_driver, at_xdmac_probe);
+}
+subsys_initcall(at_xdmac_init);
+
+MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
+MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index 6800797..918b7b3 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -525,8 +525,6 @@
 	vchan_init(&c->vc, &d->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	d->ddev.chancnt++;
-
 	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
 	c->ch = chan_id;
 	c->irq_number = irq;
@@ -694,7 +692,6 @@
 	.remove	= bcm2835_dma_remove,
 	.driver = {
 		.name = "bcm2835-dma",
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
 	},
 };
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index a58eec3..b743adf 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -1,3 +1,4 @@
+#include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
@@ -567,7 +568,7 @@
 		reg |= GCR_TEARDOWN;
 		cppi_writel(reg, c->gcr_reg);
 		c->td_queued = 1;
-		c->td_retry = 100;
+		c->td_retry = 500;
 	}
 
 	if (!c->td_seen || !c->td_desc_seen) {
@@ -603,12 +604,16 @@
 	 * descriptor before the TD we fetch it from enqueue, it has to be
 	 * there waiting for us.
 	 */
-	if (!c->td_seen && c->td_retry)
+	if (!c->td_seen && c->td_retry) {
+		udelay(1);
 		return -EAGAIN;
-
+	}
 	WARN_ON(!c->td_retry);
+
 	if (!c->td_desc_seen) {
 		desc_phys = cppi41_pop_desc(cdd, c->q_num);
+		if (!desc_phys)
+			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 		WARN_ON(!desc_phys);
 	}
 
@@ -1088,7 +1093,6 @@
 	.remove = cppi41_dma_remove,
 	.driver = {
 		.name = "cppi41-dma-engine",
-		.owner = THIS_MODULE,
 		.pm = &cppi41_pm_ops,
 		.of_match_table = of_match_ptr(cppi41_dma_ids),
 	},
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index ae2ab14..bdeafee 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -563,10 +563,9 @@
 	dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
 	dd->device_control = jz4740_dma_control;
 	dd->dev = &pdev->dev;
-	dd->chancnt = JZ_DMA_NR_CHANS;
 	INIT_LIST_HEAD(&dd->channels);
 
-	for (i = 0; i < dd->chancnt; i++) {
+	for (i = 0; i < JZ_DMA_NR_CHANS; i++) {
 		chan = &dmadev->chan[i];
 		chan->id = i;
 		chan->vchan.desc_free = jz4740_dma_desc_free;
@@ -608,7 +607,6 @@
 	.remove = jz4740_dma_remove,
 	.driver = {
 		.name = "jz4740-dma",
-		.owner = THIS_MODULE,
 	},
 };
 module_platform_driver(jz4740_dma_driver);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 24bfaf0..e057935 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -330,8 +330,7 @@
 	if (err) {
 		pr_err("initialization failure\n");
 		for_each_dma_cap_mask(cap, dma_cap_mask_all)
-			if (channel_table[cap])
-				free_percpu(channel_table[cap]);
+			free_percpu(channel_table[cap]);
 	}
 
 	return err;
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 3c5711d..6fb2e90 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -118,17 +118,17 @@
 				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
 
 struct fsl_edma_hw_tcd {
-	u32	saddr;
-	u16	soff;
-	u16	attr;
-	u32	nbytes;
-	u32	slast;
-	u32	daddr;
-	u16	doff;
-	u16	citer;
-	u32	dlast_sga;
-	u16	csr;
-	u16	biter;
+	__le32	saddr;
+	__le16	soff;
+	__le16	attr;
+	__le32	nbytes;
+	__le32	slast;
+	__le32	daddr;
+	__le16	doff;
+	__le16	citer;
+	__le32	dlast_sga;
+	__le16	csr;
+	__le16	biter;
 };
 
 struct fsl_edma_sw_tcd {
@@ -175,18 +175,12 @@
 };
 
 /*
- * R/W functions for big- or little-endian registers
- * the eDMA controller's endian is independent of the CPU core's endian.
+ * R/W functions for big- or little-endian registers:
+ * The eDMA controller's endian is independent of the CPU core's endian.
+ * For the big-endian IP module, the offset for 8-bit or 16-bit registers
+ * should also be swapped opposite to that in little-endian IP.
  */
 
-static u16 edma_readw(struct fsl_edma_engine *edma, void __iomem *addr)
-{
-	if (edma->big_endian)
-		return ioread16be(addr);
-	else
-		return ioread16(addr);
-}
-
 static u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
 {
 	if (edma->big_endian)
@@ -197,13 +191,18 @@
 
 static void edma_writeb(struct fsl_edma_engine *edma, u8 val, void __iomem *addr)
 {
-	iowrite8(val, addr);
+	/* swap the reg offset for these in big-endian mode */
+	if (edma->big_endian)
+		iowrite8(val, (void __iomem *)((unsigned long)addr ^ 0x3));
+	else
+		iowrite8(val, addr);
 }
 
 static void edma_writew(struct fsl_edma_engine *edma, u16 val, void __iomem *addr)
 {
+	/* swap the reg offset for these in big-endian mode */
 	if (edma->big_endian)
-		iowrite16be(val, addr);
+		iowrite16be(val, (void __iomem *)((unsigned long)addr ^ 0x2));
 	else
 		iowrite16(val, addr);
 }
@@ -254,13 +253,12 @@
 	chans_per_mux = fsl_chan->edma->n_chans / DMAMUX_NR;
 	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
 	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
+	slot = EDMAMUX_CHCFG_SOURCE(slot);
 
 	if (enable)
-		edma_writeb(fsl_chan->edma,
-				EDMAMUX_CHCFG_ENBL | EDMAMUX_CHCFG_SOURCE(slot),
-				muxaddr + ch_off);
+		iowrite8(EDMAMUX_CHCFG_ENBL | slot, muxaddr + ch_off);
 	else
-		edma_writeb(fsl_chan->edma, EDMAMUX_CHCFG_DIS, muxaddr + ch_off);
+		iowrite8(EDMAMUX_CHCFG_DIS, muxaddr + ch_off);
 }
 
 static unsigned int fsl_edma_get_tcd_attr(enum dma_slave_buswidth addr_width)
@@ -286,9 +284,8 @@
 
 	fsl_desc = to_fsl_edma_desc(vdesc);
 	for (i = 0; i < fsl_desc->n_tcds; i++)
-			dma_pool_free(fsl_desc->echan->tcd_pool,
-					fsl_desc->tcd[i].vtcd,
-					fsl_desc->tcd[i].ptcd);
+		dma_pool_free(fsl_desc->echan->tcd_pool, fsl_desc->tcd[i].vtcd,
+			      fsl_desc->tcd[i].ptcd);
 	kfree(fsl_desc);
 }
 
@@ -363,8 +360,8 @@
 
 	/* calculate the total size in this desc */
 	for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++)
-		len += edma_readl(fsl_chan->edma, &(edesc->tcd[i].vtcd->nbytes))
-			* edma_readw(fsl_chan->edma, &(edesc->tcd[i].vtcd->biter));
+		len += le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
 
 	if (!in_progress)
 		return len;
@@ -376,17 +373,15 @@
 
 	/* figure out the finished and calculate the residue */
 	for (i = 0; i < fsl_chan->edesc->n_tcds; i++) {
-		size = edma_readl(fsl_chan->edma, &(edesc->tcd[i].vtcd->nbytes))
-			* edma_readw(fsl_chan->edma, &(edesc->tcd[i].vtcd->biter));
+		size = le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
 		if (dir == DMA_MEM_TO_DEV)
-			dma_addr = edma_readl(fsl_chan->edma,
-					&(edesc->tcd[i].vtcd->saddr));
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr);
 		else
-			dma_addr = edma_readl(fsl_chan->edma,
-					&(edesc->tcd[i].vtcd->daddr));
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr);
 
 		len -= size;
-		if (cur_addr > dma_addr && cur_addr < dma_addr + size) {
+		if (cur_addr >= dma_addr && cur_addr < dma_addr + size) {
 			len += dma_addr + size - cur_addr;
 			break;
 		}
@@ -424,55 +419,67 @@
 	return fsl_chan->status;
 }
 
-static void fsl_edma_set_tcd_params(struct fsl_edma_chan *fsl_chan,
-		u32 src, u32 dst, u16 attr, u16 soff, u32 nbytes,
-		u32 slast, u16 citer, u16 biter, u32 doff, u32 dlast_sga,
-		u16 csr)
+static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
+				  struct fsl_edma_hw_tcd *tcd)
 {
+	struct fsl_edma_engine *edma = fsl_chan->edma;
 	void __iomem *addr = fsl_chan->edma->membase;
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 
 	/*
-	 * TCD parameters have been swapped in fill_tcd_params(),
-	 * so just write them to registers in the cpu endian here
+	 * TCD parameters are stored in struct fsl_edma_hw_tcd in little
+	 * endian format. However, we need to load the TCD registers in
+	 * big- or little-endian obeying the eDMA engine model endian.
 	 */
-	writew(0, addr + EDMA_TCD_CSR(ch));
-	writel(src, addr + EDMA_TCD_SADDR(ch));
-	writel(dst, addr + EDMA_TCD_DADDR(ch));
-	writew(attr, addr + EDMA_TCD_ATTR(ch));
-	writew(soff, addr + EDMA_TCD_SOFF(ch));
-	writel(nbytes, addr + EDMA_TCD_NBYTES(ch));
-	writel(slast, addr + EDMA_TCD_SLAST(ch));
-	writew(citer, addr + EDMA_TCD_CITER(ch));
-	writew(biter, addr + EDMA_TCD_BITER(ch));
-	writew(doff, addr + EDMA_TCD_DOFF(ch));
-	writel(dlast_sga, addr + EDMA_TCD_DLAST_SGA(ch));
-	writew(csr, addr + EDMA_TCD_CSR(ch));
+	edma_writew(edma, 0, addr + EDMA_TCD_CSR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->saddr), addr + EDMA_TCD_SADDR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->daddr), addr + EDMA_TCD_DADDR(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->attr), addr + EDMA_TCD_ATTR(ch));
+	edma_writew(edma, le16_to_cpu(tcd->soff), addr + EDMA_TCD_SOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->nbytes), addr + EDMA_TCD_NBYTES(ch));
+	edma_writel(edma, le32_to_cpu(tcd->slast), addr + EDMA_TCD_SLAST(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->citer), addr + EDMA_TCD_CITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->biter), addr + EDMA_TCD_BITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->doff), addr + EDMA_TCD_DOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->dlast_sga), addr + EDMA_TCD_DLAST_SGA(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->csr), addr + EDMA_TCD_CSR(ch));
 }
 
-static void fill_tcd_params(struct fsl_edma_engine *edma,
-		struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
-		u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
-		u16 biter, u16 doff, u32 dlast_sga, bool major_int,
-		bool disable_req, bool enable_sg)
+static inline
+void fsl_edma_fill_tcd(struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
+		       u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
+		       u16 biter, u16 doff, u32 dlast_sga, bool major_int,
+		       bool disable_req, bool enable_sg)
 {
 	u16 csr = 0;
 
 	/*
-	 * eDMA hardware SGs require the TCD parameters stored in memory
-	 * the same endian as the eDMA module so that they can be loaded
-	 * automatically by the engine
+	 * eDMA hardware SGs require the TCDs to be stored in little
+	 * endian format irrespective of the register endian model.
+	 * So we put the value in little endian in memory, waiting
+	 * for fsl_edma_set_tcd_regs doing the swap.
 	 */
-	edma_writel(edma, src, &(tcd->saddr));
-	edma_writel(edma, dst, &(tcd->daddr));
-	edma_writew(edma, attr, &(tcd->attr));
-	edma_writew(edma, EDMA_TCD_SOFF_SOFF(soff), &(tcd->soff));
-	edma_writel(edma, EDMA_TCD_NBYTES_NBYTES(nbytes), &(tcd->nbytes));
-	edma_writel(edma, EDMA_TCD_SLAST_SLAST(slast), &(tcd->slast));
-	edma_writew(edma, EDMA_TCD_CITER_CITER(citer), &(tcd->citer));
-	edma_writew(edma, EDMA_TCD_DOFF_DOFF(doff), &(tcd->doff));
-	edma_writel(edma, EDMA_TCD_DLAST_SGA_DLAST_SGA(dlast_sga), &(tcd->dlast_sga));
-	edma_writew(edma, EDMA_TCD_BITER_BITER(biter), &(tcd->biter));
+	tcd->saddr = cpu_to_le32(src);
+	tcd->daddr = cpu_to_le32(dst);
+
+	tcd->attr = cpu_to_le16(attr);
+
+	tcd->soff = cpu_to_le16(EDMA_TCD_SOFF_SOFF(soff));
+
+	tcd->nbytes = cpu_to_le32(EDMA_TCD_NBYTES_NBYTES(nbytes));
+	tcd->slast = cpu_to_le32(EDMA_TCD_SLAST_SLAST(slast));
+
+	tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer));
+	tcd->doff = cpu_to_le16(EDMA_TCD_DOFF_DOFF(doff));
+
+	tcd->dlast_sga = cpu_to_le32(EDMA_TCD_DLAST_SGA_DLAST_SGA(dlast_sga));
+
+	tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter));
 	if (major_int)
 		csr |= EDMA_TCD_CSR_INT_MAJOR;
 
@@ -482,7 +489,7 @@
 	if (enable_sg)
 		csr |= EDMA_TCD_CSR_E_SG;
 
-	edma_writew(edma, csr, &(tcd->csr));
+	tcd->csr = cpu_to_le16(csr);
 }
 
 static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
@@ -558,9 +565,9 @@
 			doff = fsl_chan->fsc.addr_width;
 		}
 
-		fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd, src_addr,
-				dst_addr, fsl_chan->fsc.attr, soff, nbytes, 0,
-				iter, iter, doff, last_sg, true, false, true);
+		fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr, dst_addr,
+				  fsl_chan->fsc.attr, soff, nbytes, 0, iter,
+				  iter, doff, last_sg, true, false, true);
 		dma_buf_next += period_len;
 	}
 
@@ -607,16 +614,16 @@
 		iter = sg_dma_len(sg) / nbytes;
 		if (i < sg_len - 1) {
 			last_sg = fsl_desc->tcd[(i + 1)].ptcd;
-			fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd,
-					src_addr, dst_addr, fsl_chan->fsc.attr,
-					soff, nbytes, 0, iter, iter, doff, last_sg,
-					false, false, true);
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  false, false, true);
 		} else {
 			last_sg = 0;
-			fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd,
-					src_addr, dst_addr, fsl_chan->fsc.attr,
-					soff, nbytes, 0, iter, iter, doff, last_sg,
-					true, true, false);
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  true, true, false);
 		}
 	}
 
@@ -625,17 +632,13 @@
 
 static void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan)
 {
-	struct fsl_edma_hw_tcd *tcd;
 	struct virt_dma_desc *vdesc;
 
 	vdesc = vchan_next_desc(&fsl_chan->vchan);
 	if (!vdesc)
 		return;
 	fsl_chan->edesc = to_fsl_edma_desc(vdesc);
-	tcd = fsl_chan->edesc->tcd[0].vtcd;
-	fsl_edma_set_tcd_params(fsl_chan, tcd->saddr, tcd->daddr, tcd->attr,
-			tcd->soff, tcd->nbytes, tcd->slast, tcd->citer,
-			tcd->biter, tcd->doff, tcd->dlast_sga, tcd->csr);
+	fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
 	fsl_edma_enable_request(fsl_chan);
 	fsl_chan->status = DMA_IN_PROGRESS;
 }
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 994bcb2..3d8feb5 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1337,7 +1337,6 @@
 
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&chan->common.device_node, &fdev->common.channels);
-	fdev->common.chancnt++;
 
 	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
 		 chan->irq != NO_IRQ ? chan->irq : fdev->irq);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 88afc48..d0df198 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -729,6 +729,7 @@
 	case IMX_DMATYPE_CSPI:
 	case IMX_DMATYPE_EXT:
 	case IMX_DMATYPE_SSI:
+	case IMX_DMATYPE_SAI:
 		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
 		break;
@@ -1287,7 +1288,8 @@
 	unsigned short *ram_code;
 
 	if (!fw) {
-		dev_err(sdma->dev, "firmware not found\n");
+		dev_info(sdma->dev, "external firmware not found, using ROM firmware\n");
+		/* In this case we just use the ROM firmware. */
 		return;
 	}
 
@@ -1346,7 +1348,7 @@
 	return ret;
 }
 
-static int __init sdma_init(struct sdma_engine *sdma)
+static int sdma_init(struct sdma_engine *sdma)
 {
 	int i, ret;
 	dma_addr_t ccb_phys;
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 895f869..32eae38 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -1265,9 +1265,17 @@
 	op = IOAT_OP_XOR;
 
 	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dest_dma))
+		goto dma_unmap;
+
 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
 				      DMA_PREP_INTERRUPT);
@@ -1298,7 +1306,6 @@
 		goto dma_unmap;
 	}
 
-	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
 
@@ -1313,6 +1320,8 @@
 	}
 	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 
+	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
 	/* skip validate if the capability is not present */
 	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
 		goto free_resources;
@@ -1327,8 +1336,13 @@
 	xor_val_result = 1;
 
 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 					  &xor_val_result, DMA_PREP_INTERRUPT);
@@ -1374,8 +1388,13 @@
 
 	xor_val_result = 0;
 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 					  &xor_val_result, DMA_PREP_INTERRUPT);
@@ -1417,14 +1436,18 @@
 	goto free_resources;
 dma_unmap:
 	if (op == IOAT_OP_XOR) {
-		dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+		if (dest_dma != DMA_ERROR_CODE)
+			dma_unmap_page(dev, dest_dma, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
 		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-				       DMA_TO_DEVICE);
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
 	} else if (op == IOAT_OP_XOR_VAL) {
 		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-				       DMA_TO_DEVICE);
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
 	}
 free_resources:
 	dma->device_free_chan_resources(dma_chan);
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index c56137b..263d9f6 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1557,7 +1557,6 @@
 	.probe		= iop_adma_probe,
 	.remove		= iop_adma_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "iop-adma",
 	},
 };
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index a1f911a..a1de14a 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -722,7 +722,6 @@
 	d->slave.device_issue_pending = k3_dma_issue_pending;
 	d->slave.device_control = k3_dma_control;
 	d->slave.copy_align = DMA_ALIGN;
-	d->slave.chancnt = d->dma_requests;
 
 	/* init virtual channel */
 	d->chans = devm_kzalloc(&op->dev,
@@ -787,6 +786,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int k3_dma_suspend(struct device *dev)
 {
 	struct k3_dma_dev *d = dev_get_drvdata(dev);
@@ -816,13 +816,13 @@
 	k3_dma_enable_dma(d, true);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend, k3_dma_resume);
 
 static struct platform_driver k3_pdma_driver = {
 	.driver		= {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 		.pm	= &k3_dma_pmops,
 		.of_match_table = k3_pdma_dt_ids,
 	},
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index a1a4db5..8b8952f 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -1098,7 +1098,6 @@
 static struct platform_driver mmp_pdma_driver = {
 	.driver		= {
 		.name	= "mmp-pdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_pdma_dt_ids,
 	},
 	.id_table	= mmp_pdma_id_table,
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index c6bd015..bfb4695 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -703,7 +703,6 @@
 static struct platform_driver mmp_tdma_driver = {
 	.driver		= {
 		.name	= "mmp-tdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_tdma_dt_ids,
 	},
 	.id_table	= mmp_tdma_id_table,
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 881db2b..01bec40 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -885,6 +885,7 @@
 	struct resource res;
 	ulong regs_start, regs_size;
 	int retval, i;
+	u8 chancnt;
 
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
@@ -956,10 +957,6 @@
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (mdma->is_mpc8308)
-		dma->chancnt = MPC8308_DMACHAN_MAX;
-	else
-		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
@@ -972,7 +969,12 @@
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
 	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	if (mdma->is_mpc8308)
+		chancnt = MPC8308_DMACHAN_MAX;
+	else
+		chancnt = MPC512x_DMACHAN_MAX;
+
+	for (i = 0; i < chancnt; i++) {
 		mchan = &mdma->channels[i];
 
 		mchan->chan.device = dma;
@@ -1090,7 +1092,6 @@
 	.remove		= mpc_dma_remove,
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table	= mpc_dma_match,
 	},
 };
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index bda20e6..d7d61e1 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1500,7 +1500,6 @@
 
 static struct platform_driver nbpf_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "dma-nbpf",
 		.of_match_table = nbpf_match,
 		.pm = &nbpf_pm_ops,
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index bbea824..6ea1ade 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -1074,8 +1074,6 @@
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	od->ddev.chancnt++;
-
 	return 0;
 }
 
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 9f9ca9f..6e0e47d 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -997,7 +997,7 @@
 #define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
 #define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
 
-const struct pci_device_id pch_dma_id_table[] = {
+static const struct pci_device_id pch_dma_id_table[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 19a9974..bdf40b5 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_dma.h>
 #include <linux/err.h>
+#include <linux/pm_runtime.h>
 
 #include "dmaengine.h"
 #define PL330_MAX_CHAN		8
@@ -265,6 +266,9 @@
 
 #define NR_DEFAULT_DESC	16
 
+/* Delay for runtime PM autosuspend, ms */
+#define PL330_AUTOSUSPEND_DELAY 20
+
 /* Populated by the PL330 core driver for DMA API driver's info */
 struct pl330_config {
 	u32	periph_id;
@@ -1958,6 +1962,7 @@
 	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
 	struct dma_pl330_desc *desc, *_dt;
 	unsigned long flags;
+	bool power_down = false;
 
 	spin_lock_irqsave(&pch->lock, flags);
 
@@ -1972,10 +1977,17 @@
 	/* Try to submit a req imm. next to the last completed cookie */
 	fill_queue(pch);
 
-	/* Make sure the PL330 Channel thread is active */
-	spin_lock(&pch->thread->dmac->lock);
-	_start(pch->thread);
-	spin_unlock(&pch->thread->dmac->lock);
+	if (list_empty(&pch->work_list)) {
+		spin_lock(&pch->thread->dmac->lock);
+		_stop(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+		power_down = true;
+	} else {
+		/* Make sure the PL330 Channel thread is active */
+		spin_lock(&pch->thread->dmac->lock);
+		_start(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+	}
 
 	while (!list_empty(&pch->completed_list)) {
 		dma_async_tx_callback callback;
@@ -1990,6 +2002,12 @@
 		if (pch->cyclic) {
 			desc->status = PREP;
 			list_move_tail(&desc->node, &pch->work_list);
+			if (power_down) {
+				spin_lock(&pch->thread->dmac->lock);
+				_start(pch->thread);
+				spin_unlock(&pch->thread->dmac->lock);
+				power_down = false;
+			}
 		} else {
 			desc->status = FREE;
 			list_move_tail(&desc->node, &pch->dmac->desc_pool);
@@ -2004,6 +2022,12 @@
 		}
 	}
 	spin_unlock_irqrestore(&pch->lock, flags);
+
+	/* If work list empty, power down */
+	if (power_down) {
+		pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+		pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+	}
 }
 
 bool pl330_filter(struct dma_chan *chan, void *param)
@@ -2073,6 +2097,7 @@
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
+		pm_runtime_get_sync(pl330->ddma.dev);
 		spin_lock_irqsave(&pch->lock, flags);
 
 		spin_lock(&pl330->lock);
@@ -2099,10 +2124,15 @@
 			dma_cookie_complete(&desc->txd);
 		}
 
+		if (!list_empty(&pch->work_list))
+			pm_runtime_put(pl330->ddma.dev);
+
 		list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool);
 		list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
 		list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
 		spin_unlock_irqrestore(&pch->lock, flags);
+		pm_runtime_mark_last_busy(pl330->ddma.dev);
+		pm_runtime_put_autosuspend(pl330->ddma.dev);
 		break;
 	case DMA_SLAVE_CONFIG:
 		slave_config = (struct dma_slave_config *)arg;
@@ -2138,6 +2168,7 @@
 
 	tasklet_kill(&pch->task);
 
+	pm_runtime_get_sync(pch->dmac->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
 
 	pl330_release_channel(pch->thread);
@@ -2147,6 +2178,8 @@
 		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
 
 	spin_unlock_irqrestore(&pch->lock, flags);
+	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }
 
 static enum dma_status
@@ -2162,6 +2195,15 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&pch->lock, flags);
+	if (list_empty(&pch->work_list)) {
+		/*
+		 * Warn on nothing pending. Empty submitted_list may
+		 * break our pm_runtime usage counter as it is
+		 * updated on work_list emptiness status.
+		 */
+		WARN_ON(list_empty(&pch->submitted_list));
+		pm_runtime_get_sync(pch->dmac->ddma.dev);
+	}
 	list_splice_tail_init(&pch->submitted_list, &pch->work_list);
 	spin_unlock_irqrestore(&pch->lock, flags);
 
@@ -2594,6 +2636,46 @@
 	return 0;
 }
 
+/*
+ * Runtime PM callbacks are provided by amba/bus.c driver.
+ *
+ * It is assumed here that IRQ safe runtime PM is chosen in probe and amba
+ * bus driver will only disable/enable the clock in runtime PM callbacks.
+ */
+static int __maybe_unused pl330_suspend(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+
+	pm_runtime_disable(dev);
+
+	if (!pm_runtime_status_suspended(dev)) {
+		/* amba did not disable the clock */
+		amba_pclk_disable(pcdev);
+	}
+	amba_pclk_unprepare(pcdev);
+
+	return 0;
+}
+
+static int __maybe_unused pl330_resume(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+	int ret;
+
+	ret = amba_pclk_prepare(pcdev);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_status_suspended(dev))
+		ret = amba_pclk_enable(pcdev);
+
+	pm_runtime_enable(dev);
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
+
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
@@ -2619,6 +2701,9 @@
 		return -ENOMEM;
 	}
 
+	pd = &pl330->ddma;
+	pd->dev = &adev->dev;
+
 	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
@@ -2655,7 +2740,6 @@
 	if (!add_desc(pl330, GFP_KERNEL, NR_DEFAULT_DESC))
 		dev_warn(&adev->dev, "unable to allocate desc\n");
 
-	pd = &pl330->ddma;
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
@@ -2692,7 +2776,6 @@
 		list_add_tail(&pch->chan.device_node, &pd->channels);
 	}
 
-	pd->dev = &adev->dev;
 	if (pdat) {
 		pd->cap_mask = pdat->cap_mask;
 	} else {
@@ -2747,6 +2830,12 @@
 		pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
 		pcfg->num_peri, pcfg->num_events);
 
+	pm_runtime_irq_safe(&adev->dev);
+	pm_runtime_use_autosuspend(&adev->dev);
+	pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);
+	pm_runtime_mark_last_busy(&adev->dev);
+	pm_runtime_put_autosuspend(&adev->dev);
+
 	return 0;
 probe_err3:
 	/* Idle the DMAC */
@@ -2773,6 +2862,8 @@
 	struct pl330_dmac *pl330 = amba_get_drvdata(adev);
 	struct dma_pl330_chan *pch, *_p;
 
+	pm_runtime_get_noresume(pl330->ddma.dev);
+
 	if (adev->dev.of_node)
 		of_dma_controller_free(adev->dev.of_node);
 
@@ -2811,6 +2902,7 @@
 	.drv = {
 		.owner = THIS_MODULE,
 		.name = "dma-pl330",
+		.pm = &pl330_pm,
 	},
 	.id_table = pl330_ids,
 	.probe = pl330_probe,
@@ -2819,6 +2911,6 @@
 
 module_amba_driver(pl330_driver);
 
-MODULE_AUTHOR("Jaswinder Singh <jassi.brar@samsung.com>");
+MODULE_AUTHOR("Jaswinder Singh <jassisinghbrar@gmail.com>");
 MODULE_DESCRIPTION("API Driver for PL330 DMAC");
 MODULE_LICENSE("GPL");
diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c
index 7a4bbb0..3122a99 100644
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c
@@ -79,35 +79,97 @@
 	struct bam_desc_hw desc[0];
 };
 
-#define BAM_CTRL			0x0000
-#define BAM_REVISION			0x0004
-#define BAM_SW_REVISION			0x0080
-#define BAM_NUM_PIPES			0x003C
-#define BAM_TIMER			0x0040
-#define BAM_TIMER_CTRL			0x0044
-#define BAM_DESC_CNT_TRSHLD		0x0008
-#define BAM_IRQ_SRCS			0x000C
-#define BAM_IRQ_SRCS_MSK		0x0010
-#define BAM_IRQ_SRCS_UNMASKED		0x0030
-#define BAM_IRQ_STTS			0x0014
-#define BAM_IRQ_CLR			0x0018
-#define BAM_IRQ_EN			0x001C
-#define BAM_CNFG_BITS			0x007C
-#define BAM_IRQ_SRCS_EE(ee)		(0x0800 + ((ee) * 0x80))
-#define BAM_IRQ_SRCS_MSK_EE(ee)		(0x0804 + ((ee) * 0x80))
-#define BAM_P_CTRL(pipe)		(0x1000 + ((pipe) * 0x1000))
-#define BAM_P_RST(pipe)			(0x1004 + ((pipe) * 0x1000))
-#define BAM_P_HALT(pipe)		(0x1008 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_STTS(pipe)		(0x1010 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_CLR(pipe)		(0x1014 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_EN(pipe)		(0x1018 + ((pipe) * 0x1000))
-#define BAM_P_EVNT_DEST_ADDR(pipe)	(0x182C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_REG(pipe)		(0x1818 + ((pipe) * 0x1000))
-#define BAM_P_SW_OFSTS(pipe)		(0x1800 + ((pipe) * 0x1000))
-#define BAM_P_DATA_FIFO_ADDR(pipe)	(0x1824 + ((pipe) * 0x1000))
-#define BAM_P_DESC_FIFO_ADDR(pipe)	(0x181C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_TRSHLD(pipe)		(0x1828 + ((pipe) * 0x1000))
-#define BAM_P_FIFO_SIZES(pipe)		(0x1820 + ((pipe) * 0x1000))
+enum bam_reg {
+	BAM_CTRL,
+	BAM_REVISION,
+	BAM_NUM_PIPES,
+	BAM_DESC_CNT_TRSHLD,
+	BAM_IRQ_SRCS,
+	BAM_IRQ_SRCS_MSK,
+	BAM_IRQ_SRCS_UNMASKED,
+	BAM_IRQ_STTS,
+	BAM_IRQ_CLR,
+	BAM_IRQ_EN,
+	BAM_CNFG_BITS,
+	BAM_IRQ_SRCS_EE,
+	BAM_IRQ_SRCS_MSK_EE,
+	BAM_P_CTRL,
+	BAM_P_RST,
+	BAM_P_HALT,
+	BAM_P_IRQ_STTS,
+	BAM_P_IRQ_CLR,
+	BAM_P_IRQ_EN,
+	BAM_P_EVNT_DEST_ADDR,
+	BAM_P_EVNT_REG,
+	BAM_P_SW_OFSTS,
+	BAM_P_DATA_FIFO_ADDR,
+	BAM_P_DESC_FIFO_ADDR,
+	BAM_P_EVNT_GEN_TRSHLD,
+	BAM_P_FIFO_SIZES,
+};
+
+struct reg_offset_data {
+	u32 base_offset;
+	unsigned int pipe_mult, evnt_mult, ee_mult;
+};
+
+static const struct reg_offset_data bam_v1_3_reg_info[] = {
+	[BAM_CTRL]		= { 0x0F80, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0F84, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x0FBC, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0F88, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x0F8C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0F90, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0FB0, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0F94, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0F98, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x0F9C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x0FFC, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x1800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x1804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x0000, 0x80, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x0004, 0x80, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x0008, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x0010, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x0014, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x0018, 0x80, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x40, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x40, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1024, 0x00, 0x40, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x101C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1028, 0x00, 0x40, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1020, 0x00, 0x40, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_4_reg_info[] = {
+	[BAM_CTRL]		= { 0x0000, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0004, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x003C, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0008, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x000C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0010, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0030, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x001C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x007C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x0800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x0804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x1000, 0x1000, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x1004, 0x1000, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x1008, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x1010, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x1014, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x1018, 0x1000, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x1000, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x1000, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1824, 0x00, 0x1000, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x181C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1828, 0x00, 0x1000, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1820, 0x00, 0x1000, 0x00 },
+};
 
 /* BAM CTRL */
 #define BAM_SW_RST			BIT(0)
@@ -297,6 +359,8 @@
 	/* execution environment ID, from DT */
 	u32 ee;
 
+	const struct reg_offset_data *layout;
+
 	struct clk *bamclk;
 	int irq;
 
@@ -305,6 +369,23 @@
 };
 
 /**
+ * bam_addr - returns BAM register address
+ * @bdev: bam device
+ * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+ * @reg:  register enum
+ */
+static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+		enum bam_reg reg)
+{
+	const struct reg_offset_data r = bdev->layout[reg];
+
+	return bdev->regs + r.base_offset +
+		r.pipe_mult * pipe +
+		r.evnt_mult * pipe +
+		r.ee_mult * bdev->ee;
+}
+
+/**
  * bam_reset_channel - Reset individual BAM DMA channel
  * @bchan: bam channel
  *
@@ -317,8 +398,8 @@
 	lockdep_assert_held(&bchan->vc.lock);
 
 	/* reset channel */
-	writel_relaxed(1, bdev->regs + BAM_P_RST(bchan->id));
-	writel_relaxed(0, bdev->regs + BAM_P_RST(bchan->id));
+	writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
 
 	/* don't allow cpu to reorder BAM register accesses done after this */
 	wmb();
@@ -347,17 +428,18 @@
 	 * because we allocated 1 more descriptor (8 bytes) than we can use
 	 */
 	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
-			bdev->regs + BAM_P_DESC_FIFO_ADDR(bchan->id));
-	writel_relaxed(BAM_DESC_FIFO_SIZE, bdev->regs +
-			BAM_P_FIFO_SIZES(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+	writel_relaxed(BAM_DESC_FIFO_SIZE,
+			bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
 
 	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
-	writel_relaxed(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(P_DEFAULT_IRQS_EN,
+			bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 
 	/* unmask the specific pipe and EE combo */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val |= BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* don't allow cpu to reorder the channel enable done below */
 	wmb();
@@ -367,7 +449,7 @@
 	if (dir == DMA_DEV_TO_MEM)
 		val |= P_DIRECTION;
 
-	writel_relaxed(val, bdev->regs + BAM_P_CTRL(bchan->id));
+	writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
 
 	bchan->initialized = 1;
 
@@ -432,12 +514,12 @@
 	bchan->fifo_virt = NULL;
 
 	/* mask irq for pipe/channel */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val &= ~BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* disable irq */
-	writel_relaxed(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 }
 
 /**
@@ -583,14 +665,14 @@
 	switch (cmd) {
 	case DMA_PAUSE:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(1, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 1;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
 
 	case DMA_RESUME:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(0, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 0;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
@@ -626,7 +708,7 @@
 	unsigned long flags;
 	struct bam_async_desc *async_desc;
 
-	srcs = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_EE(bdev->ee));
+	srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
 
 	/* return early if no pipe/channel interrupts are present */
 	if (!(srcs & P_IRQ))
@@ -639,11 +721,9 @@
 			continue;
 
 		/* clear pipe irq */
-		pipe_stts = readl_relaxed(bdev->regs +
-			BAM_P_IRQ_STTS(i));
+		pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
 
-		writel_relaxed(pipe_stts, bdev->regs +
-				BAM_P_IRQ_CLR(i));
+		writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
 
 		spin_lock_irqsave(&bchan->vc.lock, flags);
 		async_desc = bchan->curr_txd;
@@ -694,12 +774,12 @@
 		tasklet_schedule(&bdev->task);
 
 	if (srcs & BAM_IRQ)
-		clr_mask = readl_relaxed(bdev->regs + BAM_IRQ_STTS);
+		clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
 
 	/* don't allow reorder of the various accesses to the BAM registers */
 	mb();
 
-	writel_relaxed(clr_mask, bdev->regs + BAM_IRQ_CLR);
+	writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
 
 	return IRQ_HANDLED;
 }
@@ -763,7 +843,7 @@
 	else
 		maxburst = bchan->slave.dst_maxburst;
 
-	writel_relaxed(maxburst, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	bchan->reconfigure = 0;
 }
@@ -830,7 +910,7 @@
 	/* ensure descriptor writes and dma start not reordered */
 	wmb();
 	writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
-			bdev->regs + BAM_P_EVNT_REG(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
 }
 
 /**
@@ -918,43 +998,44 @@
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bdev->regs + BAM_REVISION) >> NUM_EES_SHIFT;
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
 	val &= NUM_EES_MASK;
 
 	/* check that configured EE is within range */
 	if (bdev->ee >= val)
 		return -EINVAL;
 
-	val = readl_relaxed(bdev->regs + BAM_NUM_PIPES);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
 	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
 
 	/* s/w reset bam */
 	/* after reset all pipes are disabled and idle */
-	val = readl_relaxed(bdev->regs + BAM_CTRL);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
 	val |= BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 	val &= ~BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* make sure previous stores are visible before enabling BAM */
 	wmb();
 
 	/* enable bam */
 	val |= BAM_EN;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* set descriptor threshhold, start with 4 bytes */
-	writel_relaxed(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(DEFAULT_CNT_THRSHLD,
+			bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	/* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
-	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bdev->regs + BAM_CNFG_BITS);
+	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
 
 	/* enable irqs for errors */
 	writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
-				bdev->regs + BAM_IRQ_EN);
+			bam_addr(bdev, 0, BAM_IRQ_EN));
 
 	/* unmask global bam interrupt */
-	writel_relaxed(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	return 0;
 }
@@ -969,9 +1050,18 @@
 	bchan->vc.desc_free = bam_dma_free_desc;
 }
 
+static const struct of_device_id bam_of_match[] = {
+	{ .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+	{ .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, bam_of_match);
+
 static int bam_dma_probe(struct platform_device *pdev)
 {
 	struct bam_device *bdev;
+	const struct of_device_id *match;
 	struct resource *iores;
 	int ret, i;
 
@@ -981,6 +1071,14 @@
 
 	bdev->dev = &pdev->dev;
 
+	match = of_match_node(bam_of_match, pdev->dev.of_node);
+	if (!match) {
+		dev_err(&pdev->dev, "Unsupported BAM module\n");
+		return -ENODEV;
+	}
+
+	bdev->layout = match->data;
+
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
 	if (IS_ERR(bdev->regs))
@@ -1084,7 +1182,7 @@
 	dma_async_device_unregister(&bdev->common);
 
 	/* mask all interrupts for this execution environment */
-	writel_relaxed(0, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(0, bam_addr(bdev, 0,  BAM_IRQ_SRCS_MSK_EE));
 
 	devm_free_irq(bdev->dev, bdev->irq, bdev);
 
@@ -1104,18 +1202,11 @@
 	return 0;
 }
 
-static const struct of_device_id bam_of_match[] = {
-	{ .compatible = "qcom,bam-v1.4.0", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, bam_of_match);
-
 static struct platform_driver bam_dma_driver = {
 	.probe = bam_dma_probe,
 	.remove = bam_dma_remove,
 	.driver = {
 		.name = "bam-dma-engine",
-		.owner = THIS_MODULE,
 		.of_match_table = bam_of_match,
 	},
 };
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 7416572..6941a77 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1402,7 +1402,6 @@
 static struct platform_driver s3c24xx_dma_driver = {
 	.driver		= {
 		.name	= "s3c24xx-dma",
-		.owner	= THIS_MODULE,
 	},
 	.id_table	= s3c24xx_dma_driver_ids,
 	.probe		= s3c24xx_dma_probe,
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 4b0ef04..2329d29 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -829,7 +829,6 @@
 {
 	unsigned i;
 
-	dmadev->chancnt = ARRAY_SIZE(chan_desc);
 	INIT_LIST_HEAD(&dmadev->channels);
 	dmadev->dev = dev;
 	dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
@@ -838,7 +837,7 @@
 	dmadev->device_tx_status = sa11x0_dma_tx_status;
 	dmadev->device_issue_pending = sa11x0_dma_issue_pending;
 
-	for (i = 0; i < dmadev->chancnt; i++) {
+	for (i = 0; i < ARRAY_SIZE(chan_desc); i++) {
 		struct sa11x0_dma_chan *c;
 
 		c = kzalloc(sizeof(*c), GFP_KERNEL);
diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c
index 80fd2ae..d95bbdd 100644
--- a/drivers/dma/sh/rcar-audmapp.c
+++ b/drivers/dma/sh/rcar-audmapp.c
@@ -253,7 +253,6 @@
 
 static void audmapp_chan_remove(struct audmapp_device *audev)
 {
-	struct dma_device *dma_dev = &audev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -261,7 +260,6 @@
 		BUG_ON(!schan);
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
@@ -367,7 +365,6 @@
 	.probe		= audmapp_probe,
 	.remove		= audmapp_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "rcar-audmapp-engine",
 		.of_match_table = audmapp_of_match,
 	},
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
index b212d94..20a6f6f 100644
--- a/drivers/dma/sh/rcar-hpbdma.c
+++ b/drivers/dma/sh/rcar-hpbdma.c
@@ -619,7 +619,6 @@
 
 static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
 {
-	struct dma_device *dma_dev = &hpbdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -628,7 +627,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static int hpb_dmae_remove(struct platform_device *pdev)
@@ -655,7 +653,6 @@
 	.remove		= hpb_dmae_remove,
 	.shutdown	= hpb_dmae_shutdown,
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= "hpb-dma-engine",
 	},
 };
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 42d4974..3a2adb1 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -391,6 +391,8 @@
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
 				pm_runtime_put(schan->dev);
 				schan->pm_state = SHDMA_PM_ESTABLISHED;
+			} else if (schan->pm_state == SHDMA_PM_PENDING) {
+				shdma_chan_xfer_ld_queue(schan);
 			}
 		}
 	}
@@ -951,7 +953,7 @@
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&schan->dma_chan.device_node,
 			&sdev->dma_dev.channels);
-	sdev->schan[sdev->dma_dev.chancnt++] = schan;
+	sdev->schan[id] = schan;
 }
 EXPORT_SYMBOL(shdma_chan_probe);
 
diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c
index b4ff9d3..f999f9b 100644
--- a/drivers/dma/sh/shdma-of.c
+++ b/drivers/dma/sh/shdma-of.c
@@ -66,7 +66,6 @@
 
 static struct platform_driver shdma_of = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "shdma-of",
 		.of_match_table = shdma_of_match,
 	},
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 58eb857..b65317c 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -572,7 +572,6 @@
 
 static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 {
-	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -581,7 +580,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static void sh_dmae_shutdown(struct platform_device *pdev)
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index 3ce1039..6da2eaa 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c
@@ -295,7 +295,6 @@
 
 static void sudmac_chan_remove(struct sudmac_device *su_dev)
 {
-	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -304,7 +303,6 @@
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan)
@@ -411,7 +409,6 @@
 
 static struct platform_driver sudmac_driver = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= SUDMAC_DRV_NAME,
 	},
 	.probe		= sudmac_probe,
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index aac03ab..feb1e8a 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -735,7 +735,6 @@
 
 	dma = &sdma->dma;
 	dma->dev = dev;
-	dma->chancnt = SIRFSOC_DMA_CHANNELS;
 
 	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
@@ -752,7 +751,7 @@
 	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
 	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	for (i = 0; i < SIRFSOC_DMA_CHANNELS; i++) {
 		schan = &sdma->channels[i];
 
 		schan->chan.device = dma;
@@ -835,6 +834,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int sirfsoc_dma_pm_suspend(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
@@ -916,6 +916,7 @@
 
 	return 0;
 }
+#endif
 
 static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index d9ca3e3..4d07106 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3432,6 +3432,7 @@
 
 			d40_err(base->dev, "Failed to allocate %d pages.\n",
 				base->lcla_pool.pages);
+			ret = -ENOMEM;
 
 			for (j = 0; j < i; j++)
 				free_pages(page_list[j], base->lcla_pool.pages);
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index 91292f5..159f173 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of_dma.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -26,24 +27,6 @@
 #include "virt-dma.h"
 
 /*
- * There's 16 physical channels that can work in parallel.
- *
- * However we have 30 different endpoints for our requests.
- *
- * Since the channels are able to handle only an unidirectional
- * transfer, we need to allocate more virtual channels so that
- * everyone can grab one channel.
- *
- * Some devices can't work in both direction (mostly because it
- * wouldn't make sense), so we have a bit fewer virtual channels than
- * 2 channels per endpoints.
- */
-
-#define NR_MAX_CHANNELS		16
-#define NR_MAX_REQUESTS		30
-#define NR_MAX_VCHANS		53
-
-/*
  * Common registers
  */
 #define DMA_IRQ_EN(x)		((x) * 0x04)
@@ -60,6 +43,12 @@
 #define DMA_STAT		0x30
 
 /*
+ * sun8i specific registers
+ */
+#define SUN8I_DMA_GATE		0x20
+#define SUN8I_DMA_GATE_ENABLE	0x4
+
+/*
  * Channels specific registers
  */
 #define DMA_CHAN_ENABLE		0x00
@@ -102,6 +91,19 @@
 #define DRQ_SDRAM	1
 
 /*
+ * Hardware channels / ports representation
+ *
+ * The hardware is used in several SoCs, with differing numbers
+ * of channels and endpoints. This structure ties those numbers
+ * to a certain compatible string.
+ */
+struct sun6i_dma_config {
+	u32 nr_max_channels;
+	u32 nr_max_requests;
+	u32 nr_max_vchans;
+};
+
+/*
  * Hardware representation of the LLI
  *
  * The hardware will be fed the physical address of this structure,
@@ -159,6 +161,7 @@
 	struct dma_pool		*pool;
 	struct sun6i_pchan	*pchans;
 	struct sun6i_vchan	*vchans;
+	const struct sun6i_dma_config *cfg;
 };
 
 static struct device *chan2dev(struct dma_chan *chan)
@@ -426,6 +429,7 @@
 static void sun6i_dma_tasklet(unsigned long data)
 {
 	struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data;
+	const struct sun6i_dma_config *cfg = sdev->cfg;
 	struct sun6i_vchan *vchan;
 	struct sun6i_pchan *pchan;
 	unsigned int pchan_alloc = 0;
@@ -453,7 +457,7 @@
 	}
 
 	spin_lock_irq(&sdev->lock);
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		pchan = &sdev->pchans[pchan_idx];
 
 		if (pchan->vchan || list_empty(&sdev->pending))
@@ -474,7 +478,7 @@
 	}
 	spin_unlock_irq(&sdev->lock);
 
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		if (!(pchan_alloc & BIT(pchan_idx)))
 			continue;
 
@@ -496,7 +500,7 @@
 	int i, j, ret = IRQ_NONE;
 	u32 status;
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_channels / DMA_IRQ_CHAN_NR; i++) {
 		status = readl(sdev->base + DMA_IRQ_STAT(i));
 		if (!status)
 			continue;
@@ -506,7 +510,7 @@
 
 		writel(status, sdev->base + DMA_IRQ_STAT(i));
 
-		for (j = 0; (j < 8) && status; j++) {
+		for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
 			if (status & DMA_IRQ_QUEUE) {
 				pchan = sdev->pchans + j;
 				vchan = pchan->vchan;
@@ -519,7 +523,7 @@
 				}
 			}
 
-			status = status >> 4;
+			status = status >> DMA_IRQ_CHAN_WIDTH;
 		}
 
 		if (!atomic_read(&sdev->tasklet_shutdown))
@@ -815,7 +819,7 @@
 	struct dma_chan *chan;
 	u8 port = dma_spec->args[0];
 
-	if (port > NR_MAX_REQUESTS)
+	if (port > sdev->cfg->nr_max_requests)
 		return NULL;
 
 	chan = dma_get_any_slave_channel(&sdev->slave);
@@ -848,7 +852,7 @@
 {
 	int i;
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdev->vchans[i];
 
 		list_del(&vchan->vc.chan.device_node);
@@ -856,8 +860,48 @@
 	}
 }
 
+/*
+ * For A31:
+ *
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+static struct sun6i_dma_config sun6i_a31_dma_cfg = {
+	.nr_max_channels = 16,
+	.nr_max_requests = 30,
+	.nr_max_vchans   = 53,
+};
+
+/*
+ * The A23 only has 8 physical channels, a maximum DRQ port id of 24,
+ * and a total of 37 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_a23_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 24,
+	.nr_max_vchans   = 37,
+};
+
+static struct of_device_id sun6i_dma_match[] = {
+	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
+	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ /* sentinel */ }
+};
+
 static int sun6i_dma_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *device;
 	struct sun6i_dma_dev *sdc;
 	struct resource *res;
 	int ret, i;
@@ -866,6 +910,11 @@
 	if (!sdc)
 		return -ENOMEM;
 
+	device = of_match_device(sun6i_dma_match, &pdev->dev);
+	if (!device)
+		return -ENODEV;
+	sdc->cfg = device->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sdc->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(sdc->base))
@@ -912,31 +961,30 @@
 	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
 	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
 	sdc->slave.device_control		= sun6i_dma_control;
-	sdc->slave.chancnt			= NR_MAX_VCHANS;
 	sdc->slave.copy_align			= 4;
 
 	sdc->slave.dev = &pdev->dev;
 
-	sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS,
+	sdc->pchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_channels,
 				   sizeof(struct sun6i_pchan), GFP_KERNEL);
 	if (!sdc->pchans)
 		return -ENOMEM;
 
-	sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS,
+	sdc->vchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_vchans,
 				   sizeof(struct sun6i_vchan), GFP_KERNEL);
 	if (!sdc->vchans)
 		return -ENOMEM;
 
 	tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc);
 
-	for (i = 0; i < NR_MAX_CHANNELS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_channels; i++) {
 		struct sun6i_pchan *pchan = &sdc->pchans[i];
 
 		pchan->idx = i;
 		pchan->base = sdc->base + 0x100 + i * 0x40;
 	}
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdc->vchans[i];
 
 		INIT_LIST_HEAD(&vchan->node);
@@ -976,6 +1024,15 @@
 		goto err_dma_unregister;
 	}
 
+	/*
+	 * sun8i variant requires us to toggle a dma gating register,
+	 * as seen in Allwinner's SDK. This register is not documented
+	 * in the A23 user manual.
+	 */
+	if (of_device_is_compatible(pdev->dev.of_node,
+				    "allwinner,sun8i-a23-dma"))
+		writel(SUN8I_DMA_GATE_ENABLE, sdc->base + SUN8I_DMA_GATE);
+
 	return 0;
 
 err_dma_unregister:
@@ -1008,11 +1065,6 @@
 	return 0;
 }
 
-static struct of_device_id sun6i_dma_match[] = {
-	{ .compatible = "allwinner,sun6i-a31-dma" },
-	{ /* sentinel */ }
-};
-
 static struct platform_driver sun6i_dma_driver = {
 	.probe		= sun6i_dma_probe,
 	.remove		= sun6i_dma_remove,
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 1c867d03..d8450c3 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1597,7 +1597,6 @@
 static struct platform_driver tegra_dmac_driver = {
 	.driver = {
 		.name	= "tegra-apbdma",
-		.owner = THIS_MODULE,
 		.pm	= &tegra_dma_dev_pm_ops,
 		.of_match_table = tegra_dma_of_match,
 	},
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 4506a7b..2407ccf 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -783,7 +783,6 @@
 static struct platform_driver td_driver = {
 	.driver = {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 	},
 	.probe	= td_probe,
 	.remove	= td_remove,
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
index a6e6476..4a3a8f3 100644
--- a/drivers/dma/xilinx/xilinx_vdma.c
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -942,6 +942,9 @@
 	if (!xt->numf || !xt->sgl[0].size)
 		return NULL;
 
+	if (xt->frame_size != 1)
+		return NULL;
+
 	/* Allocate a transaction descriptor. */
 	desc = xilinx_vdma_alloc_tx_descriptor(chan);
 	if (!desc)
@@ -960,7 +963,7 @@
 	hw = &segment->hw;
 	hw->vsize = xt->numf;
 	hw->hsize = xt->sgl[0].size;
-	hw->stride = xt->sgl[0].icg <<
+	hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
 			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
 	hw->stride |= chan->config.frm_dly <<
 			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
@@ -971,9 +974,11 @@
 		hw->buf_addr = xt->src_start;
 
 	/* Link the previous next descriptor to current */
-	prev = list_last_entry(&desc->segments,
-				struct xilinx_vdma_tx_segment, node);
-	prev->hw.next_desc = segment->phys;
+	if (!list_empty(&desc->segments)) {
+		prev = list_last_entry(&desc->segments,
+				       struct xilinx_vdma_tx_segment, node);
+		prev->hw.next_desc = segment->phys;
+	}
 
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 3d42043..8c6363c 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -487,7 +487,7 @@
 	 * it. It should be fixed as of version 1.9, but to be on the safe side
 	 * we'll leave the limitation below for the 2.2.x tree.
 	 */
-	if (!strncmp((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI", 20)) {
+	if (strstarts((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI")) {
 		drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE;
 		/* This value will be visible in the /proc/ide/hdx/settings */
 		drive->pc_delay = IDEFLOPPY_PC_DELAY;
@@ -498,7 +498,7 @@
 	 * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes
 	 * nasty clicking noises without it, so please don't remove this.
 	 */
-	if (strncmp((char *)&id[ATA_ID_PROD], "IOMEGA Clik!", 11) == 0) {
+	if (strstarts((char *)&id[ATA_ID_PROD], "IOMEGA Clik!")) {
 		blk_queue_max_hw_sectors(drive->queue, 64);
 		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
 		/* IOMEGA Clik! drives do not support lock/unlock commands */
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 2ce6268..e29b02c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -101,8 +101,7 @@
 	struct device *host_dev = drive->hwif->host->dev[0];
 	struct module *module = host_dev ? host_dev->driver->owner : NULL;
 
-	if (module)
-		module_put(module);
+	module_put(module);
 #endif
 	put_device(&drive->gendev);
 }
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6dbfbc2..30f0e61 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -144,13 +144,26 @@
 	select IOMMU_API
 
 config OMAP_IOMMU_DEBUG
-       tristate "Export OMAP IOMMU internals in DebugFS"
-       depends on OMAP_IOMMU && DEBUG_FS
-       help
-         Select this to see extensive information about
-         the internal state of OMAP IOMMU in debugfs.
+	bool "Export OMAP IOMMU internals in DebugFS"
+	depends on OMAP_IOMMU && DEBUG_FS
+	---help---
+	  Select this to see extensive information about
+	  the internal state of OMAP IOMMU in debugfs.
 
-         Say N unless you know you need this.
+	  Say N unless you know you need this.
+
+config ROCKCHIP_IOMMU
+	bool "Rockchip IOMMU Support"
+	depends on ARM
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	help
+	  Support for IOMMUs found on Rockchip rk32xx SOCs.
+	  These IOMMUs allow virtualization of the address space used by most
+	  cores within the multimedia subsystem.
+	  Say Y here if you are using a Rockchip SoC that includes an IOMMU
+	  device.
 
 config TEGRA_IOMMU_GART
 	bool "Tegra GART IOMMU Support"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 16edef7..7b976f2 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -11,8 +11,8 @@
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
-obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
+obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2d84c9e..b205f76 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3411,6 +3411,8 @@
 		return true;
 	case IOMMU_CAP_INTR_REMAP:
 		return (irq_remapping_enabled == 1);
+	case IOMMU_CAP_NOEXEC:
+		return false;
 	}
 
 	return false;
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 9c0d6e2..bea878f 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -279,10 +279,8 @@
 
 static void put_pasid_state(struct pasid_state *pasid_state)
 {
-	if (atomic_dec_and_test(&pasid_state->count)) {
-		put_device_state(pasid_state->device_state);
+	if (atomic_dec_and_test(&pasid_state->count))
 		wake_up(&pasid_state->wq);
-	}
 }
 
 static void put_pasid_state_wait(struct pasid_state *pasid_state)
@@ -291,9 +289,7 @@
 
 	prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
 
-	if (atomic_dec_and_test(&pasid_state->count))
-		put_device_state(pasid_state->device_state);
-	else
+	if (!atomic_dec_and_test(&pasid_state->count))
 		schedule();
 
 	finish_wait(&pasid_state->wq, &wait);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e393ae0..b8aac13 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -404,9 +404,16 @@
 #define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)
 #define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1)
 
+enum arm_smmu_domain_stage {
+	ARM_SMMU_DOMAIN_S1 = 0,
+	ARM_SMMU_DOMAIN_S2,
+	ARM_SMMU_DOMAIN_NESTED,
+};
+
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct arm_smmu_cfg		cfg;
+	enum arm_smmu_domain_stage	stage;
 	spinlock_t			lock;
 };
 
@@ -906,19 +913,46 @@
 	if (smmu_domain->smmu)
 		goto out_unlock;
 
-	if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
+	/*
+	 * Mapping the requested stage onto what we support is surprisingly
+	 * complicated, mainly because the spec allows S1+S2 SMMUs without
+	 * support for nested translation. That means we end up with the
+	 * following table:
+	 *
+	 * Requested        Supported        Actual
+	 *     S1               N              S1
+	 *     S1             S1+S2            S1
+	 *     S1               S2             S2
+	 *     S1               S1             S1
+	 *     N                N              N
+	 *     N              S1+S2            S2
+	 *     N                S2             S2
+	 *     N                S1             S1
+	 *
+	 * Note that you can't actually request stage-2 mappings.
+	 */
+	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+	switch (smmu_domain->stage) {
+	case ARM_SMMU_DOMAIN_S1:
+		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+		start = smmu->num_s2_context_banks;
+		break;
+	case ARM_SMMU_DOMAIN_NESTED:
 		/*
 		 * We will likely want to change this if/when KVM gets
 		 * involved.
 		 */
-		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
-		start = smmu->num_s2_context_banks;
-	} else if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) {
-		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
-		start = smmu->num_s2_context_banks;
-	} else {
+	case ARM_SMMU_DOMAIN_S2:
 		cfg->cbar = CBAR_TYPE_S2_TRANS;
 		start = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_unlock;
 	}
 
 	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
@@ -1281,7 +1315,7 @@
 				   unsigned long pfn, int prot, int stage)
 {
 	pte_t *pte, *start;
-	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;
+	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF;
 
 	if (pmd_none(*pmd)) {
 		/* Allocate a new set of tables */
@@ -1315,10 +1349,11 @@
 			pteval |= ARM_SMMU_PTE_MEMATTR_NC;
 	}
 
+	if (prot & IOMMU_NOEXEC)
+		pteval |= ARM_SMMU_PTE_XN;
+
 	/* If no access, create a faulting entry to avoid TLB fills */
-	if (prot & IOMMU_EXEC)
-		pteval &= ~ARM_SMMU_PTE_XN;
-	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 		pteval &= ~ARM_SMMU_PTE_PAGE;
 
 	pteval |= ARM_SMMU_PTE_SH_IS;
@@ -1568,6 +1603,8 @@
 		return true;
 	case IOMMU_CAP_INTR_REMAP:
 		return true; /* MSIs are just memory writes */
+	case IOMMU_CAP_NOEXEC:
+		return true;
 	default:
 		return false;
 	}
@@ -1644,21 +1681,57 @@
 	iommu_group_remove_device(dev);
 }
 
+static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+				    enum iommu_attr attr, void *data)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	switch (attr) {
+	case DOMAIN_ATTR_NESTING:
+		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+		return 0;
+	default:
+		return -ENODEV;
+	}
+}
+
+static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+				    enum iommu_attr attr, void *data)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	switch (attr) {
+	case DOMAIN_ATTR_NESTING:
+		if (smmu_domain->smmu)
+			return -EPERM;
+		if (*(int *)data)
+			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+		else
+			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+		return 0;
+	default:
+		return -ENODEV;
+	}
+}
+
 static const struct iommu_ops arm_smmu_ops = {
-	.capable	= arm_smmu_capable,
-	.domain_init	= arm_smmu_domain_init,
-	.domain_destroy	= arm_smmu_domain_destroy,
-	.attach_dev	= arm_smmu_attach_dev,
-	.detach_dev	= arm_smmu_detach_dev,
-	.map		= arm_smmu_map,
-	.unmap		= arm_smmu_unmap,
-	.map_sg		= default_iommu_map_sg,
-	.iova_to_phys	= arm_smmu_iova_to_phys,
-	.add_device	= arm_smmu_add_device,
-	.remove_device	= arm_smmu_remove_device,
-	.pgsize_bitmap	= (SECTION_SIZE |
-			   ARM_SMMU_PTE_CONT_SIZE |
-			   PAGE_SIZE),
+	.capable		= arm_smmu_capable,
+	.domain_init		= arm_smmu_domain_init,
+	.domain_destroy		= arm_smmu_domain_destroy,
+	.attach_dev		= arm_smmu_attach_dev,
+	.detach_dev		= arm_smmu_detach_dev,
+	.map			= arm_smmu_map,
+	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
+	.iova_to_phys		= arm_smmu_iova_to_phys,
+	.add_device		= arm_smmu_add_device,
+	.remove_device		= arm_smmu_remove_device,
+	.domain_get_attr	= arm_smmu_domain_get_attr,
+	.domain_set_attr	= arm_smmu_domain_set_attr,
+	.pgsize_bitmap		= (SECTION_SIZE |
+				   ARM_SMMU_PTE_CONT_SIZE |
+				   PAGE_SIZE),
 };
 
 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
@@ -2073,8 +2146,20 @@
 
 static int __init arm_smmu_init(void)
 {
+	struct device_node *np;
 	int ret;
 
+	/*
+	 * Play nice with systems that don't have an ARM SMMU by checking that
+	 * an ARM SMMU exists in the system before proceeding with the driver
+	 * and IOMMU bus operation registration.
+	 */
+	np = of_find_matching_node(NULL, arm_smmu_of_match);
+	if (!np)
+		return 0;
+
+	of_node_put(np);
+
 	ret = platform_driver_register(&arm_smmu_driver);
 	if (ret)
 		return ret;
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c5c61ca..9847613 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -44,6 +44,14 @@
 
 #include "irq_remapping.h"
 
+typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
+struct dmar_res_callback {
+	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
+	void			*arg[ACPI_DMAR_TYPE_RESERVED];
+	bool			ignore_unhandled;
+	bool			print_entry;
+};
+
 /*
  * Assumptions:
  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
@@ -62,11 +70,12 @@
 struct acpi_table_header * __initdata dmar_tbl;
 static acpi_size dmar_tbl_size;
 static int dmar_dev_scope_status = 1;
+static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
 
 static int alloc_iommu(struct dmar_drhd_unit *drhd);
 static void free_iommu(struct intel_iommu *iommu);
 
-static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
+static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 {
 	/*
 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
@@ -344,24 +353,45 @@
 	.priority = INT_MIN,
 };
 
+static struct dmar_drhd_unit *
+dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
+		if (dmaru->segment == drhd->segment &&
+		    dmaru->reg_base_addr == drhd->address)
+			return dmaru;
+
+	return NULL;
+}
+
 /**
  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
  * structure which uniquely represent one DMA remapping hardware unit
  * present in the platform
  */
-static int __init
-dmar_parse_one_drhd(struct acpi_dmar_header *header)
+static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
 	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
-	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
+	dmaru = dmar_find_dmaru(drhd);
+	if (dmaru)
+		goto out;
+
+	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
 	if (!dmaru)
 		return -ENOMEM;
 
-	dmaru->hdr = header;
+	/*
+	 * If header is allocated from slab by ACPI _DSM method, we need to
+	 * copy the content because the memory buffer will be freed on return.
+	 */
+	dmaru->hdr = (void *)(dmaru + 1);
+	memcpy(dmaru->hdr, header, header->length);
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
@@ -381,6 +411,11 @@
 		return ret;
 	}
 	dmar_register_drhd_unit(dmaru);
+
+out:
+	if (arg)
+		(*(int *)arg)++;
+
 	return 0;
 }
 
@@ -393,7 +428,8 @@
 	kfree(dmaru);
 }
 
-static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
+static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
+				      void *arg)
 {
 	struct acpi_dmar_andd *andd = (void *)header;
 
@@ -414,8 +450,7 @@
 }
 
 #ifdef CONFIG_ACPI_NUMA
-static int __init
-dmar_parse_one_rhsa(struct acpi_dmar_header *header)
+static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_rhsa *rhsa;
 	struct dmar_drhd_unit *drhd;
@@ -442,6 +477,8 @@
 
 	return 0;
 }
+#else
+#define	dmar_parse_one_rhsa		dmar_res_noop
 #endif
 
 static void __init
@@ -503,6 +540,52 @@
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 }
 
+static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
+				       size_t len, struct dmar_res_callback *cb)
+{
+	int ret = 0;
+	struct acpi_dmar_header *iter, *next;
+	struct acpi_dmar_header *end = ((void *)start) + len;
+
+	for (iter = start; iter < end && ret == 0; iter = next) {
+		next = (void *)iter + iter->length;
+		if (iter->length == 0) {
+			/* Avoid looping forever on bad ACPI tables */
+			pr_debug(FW_BUG "Invalid 0-length structure\n");
+			break;
+		} else if (next > end) {
+			/* Avoid passing table end */
+			pr_warn(FW_BUG "record passes table end\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (cb->print_entry)
+			dmar_table_print_dmar_entry(iter);
+
+		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
+			/* continue for forward compatibility */
+			pr_debug("Unknown DMAR structure type %d\n",
+				 iter->type);
+		} else if (cb->cb[iter->type]) {
+			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
+		} else if (!cb->ignore_unhandled) {
+			pr_warn("No handler for DMAR structure type %d\n",
+				iter->type);
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
+
+static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
+				       struct dmar_res_callback *cb)
+{
+	return dmar_walk_remapping_entries((void *)(dmar + 1),
+			dmar->header.length - sizeof(*dmar), cb);
+}
+
 /**
  * parse_dmar_table - parses the DMA reporting table
  */
@@ -510,9 +593,18 @@
 parse_dmar_table(void)
 {
 	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 	int drhd_count = 0;
+	struct dmar_res_callback cb = {
+		.print_entry = true,
+		.ignore_unhandled = true,
+		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
+		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
+		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
+		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
+	};
 
 	/*
 	 * Do it again, earlier dmar_tbl mapping could be mapped with
@@ -536,51 +628,10 @@
 	}
 
 	pr_info("Host address width %d\n", dmar->width + 1);
-
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			ret = -EINVAL;
-			break;
-		}
-
-		dmar_table_print_dmar_entry(entry_header);
-
-		switch (entry_header->type) {
-		case ACPI_DMAR_TYPE_HARDWARE_UNIT:
-			drhd_count++;
-			ret = dmar_parse_one_drhd(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
-			ret = dmar_parse_one_rmrr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_ROOT_ATS:
-			ret = dmar_parse_one_atsr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
-#ifdef CONFIG_ACPI_NUMA
-			ret = dmar_parse_one_rhsa(entry_header);
-#endif
-			break;
-		case ACPI_DMAR_TYPE_NAMESPACE:
-			ret = dmar_parse_one_andd(entry_header);
-			break;
-		default:
-			pr_warn("Unknown DMAR structure type %d\n",
-				entry_header->type);
-			ret = 0; /* for forward compatibility */
-			break;
-		}
-		if (ret)
-			break;
-
-		entry_header = ((void *)entry_header + entry_header->length);
-	}
-	if (drhd_count == 0)
+	ret = dmar_walk_dmar_table(dmar, &cb);
+	if (ret == 0 && drhd_count == 0)
 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
+
 	return ret;
 }
 
@@ -778,76 +829,68 @@
 		dmi_get_system_info(DMI_PRODUCT_VERSION));
 }
 
-static int __init check_zero_address(void)
+static int __ref
+dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 {
-	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	struct acpi_dmar_hardware_unit *drhd;
+	void __iomem *addr;
+	u64 cap, ecap;
 
-	dmar = (struct acpi_table_dmar *)dmar_tbl;
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			return 0;
-		}
-
-		if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
-			void __iomem *addr;
-			u64 cap, ecap;
-
-			drhd = (void *)entry_header;
-			if (!drhd->address) {
-				warn_invalid_dmar(0, "");
-				goto failed;
-			}
-
-			addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
-			if (!addr ) {
-				printk("IOMMU: can't validate: %llx\n", drhd->address);
-				goto failed;
-			}
-			cap = dmar_readq(addr + DMAR_CAP_REG);
-			ecap = dmar_readq(addr + DMAR_ECAP_REG);
-			early_iounmap(addr, VTD_PAGE_SIZE);
-			if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
-				warn_invalid_dmar(drhd->address,
-						  " returns all ones");
-				goto failed;
-			}
-		}
-
-		entry_header = ((void *)entry_header + entry_header->length);
+	drhd = (void *)entry;
+	if (!drhd->address) {
+		warn_invalid_dmar(0, "");
+		return -EINVAL;
 	}
-	return 1;
 
-failed:
+	if (arg)
+		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
+	else
+		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
+	if (!addr) {
+		pr_warn("IOMMU: can't validate: %llx\n", drhd->address);
+		return -EINVAL;
+	}
+
+	cap = dmar_readq(addr + DMAR_CAP_REG);
+	ecap = dmar_readq(addr + DMAR_ECAP_REG);
+
+	if (arg)
+		iounmap(addr);
+	else
+		early_iounmap(addr, VTD_PAGE_SIZE);
+
+	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
+		warn_invalid_dmar(drhd->address, " returns all ones");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
 int __init detect_intel_iommu(void)
 {
 	int ret;
+	struct dmar_res_callback validate_drhd_cb = {
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
+		.ignore_unhandled = true,
+	};
 
 	down_write(&dmar_global_lock);
 	ret = dmar_table_detect();
 	if (ret)
-		ret = check_zero_address();
-	{
-		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
-			iommu_detected = 1;
-			/* Make sure ACS will be enabled */
-			pci_request_acs();
-		}
+		ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
+					    &validate_drhd_cb);
+	if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
+		iommu_detected = 1;
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
+	}
 
 #ifdef CONFIG_X86
-		if (ret)
-			x86_init.iommu.iommu_init = intel_iommu_init;
+	if (ret)
+		x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
-	}
+
 	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
 	dmar_tbl = NULL;
 	up_write(&dmar_global_lock);
@@ -931,11 +974,32 @@
 	return err;
 }
 
+static int dmar_alloc_seq_id(struct intel_iommu *iommu)
+{
+	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
+					    DMAR_UNITS_SUPPORTED);
+	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
+		iommu->seq_id = -1;
+	} else {
+		set_bit(iommu->seq_id, dmar_seq_ids);
+		sprintf(iommu->name, "dmar%d", iommu->seq_id);
+	}
+
+	return iommu->seq_id;
+}
+
+static void dmar_free_seq_id(struct intel_iommu *iommu)
+{
+	if (iommu->seq_id >= 0) {
+		clear_bit(iommu->seq_id, dmar_seq_ids);
+		iommu->seq_id = -1;
+	}
+}
+
 static int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
 	u32 ver, sts;
-	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
 	int err;
@@ -949,13 +1013,16 @@
 	if (!iommu)
 		return -ENOMEM;
 
-	iommu->seq_id = iommu_allocated++;
-	sprintf (iommu->name, "dmar%d", iommu->seq_id);
+	if (dmar_alloc_seq_id(iommu) < 0) {
+		pr_err("IOMMU: failed to allocate seq_id\n");
+		err = -ENOSPC;
+		goto error;
+	}
 
 	err = map_iommu(iommu, drhd->reg_base_addr);
 	if (err) {
 		pr_err("IOMMU: failed to map %s\n", iommu->name);
-		goto error;
+		goto error_free_seq_id;
 	}
 
 	err = -EINVAL;
@@ -1005,9 +1072,11 @@
 
 	return 0;
 
- err_unmap:
+err_unmap:
 	unmap_iommu(iommu);
- error:
+error_free_seq_id:
+	dmar_free_seq_id(iommu);
+error:
 	kfree(iommu);
 	return err;
 }
@@ -1031,6 +1100,7 @@
 	if (iommu->reg)
 		unmap_iommu(iommu);
 
+	dmar_free_seq_id(iommu);
 	kfree(iommu);
 }
 
@@ -1661,12 +1731,17 @@
 	return dmar->flags & 0x1;
 }
 
+/* Check whether DMAR units are in use */
+static inline bool dmar_in_use(void)
+{
+	return irq_remapping_enabled || intel_iommu_enabled;
+}
+
 static int __init dmar_free_unused_resources(void)
 {
 	struct dmar_drhd_unit *dmaru, *dmaru_n;
 
-	/* DMAR units are in use */
-	if (irq_remapping_enabled || intel_iommu_enabled)
+	if (dmar_in_use())
 		return 0;
 
 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
@@ -1684,3 +1759,242 @@
 
 late_initcall(dmar_free_unused_resources);
 IOMMU_INIT_POST(detect_intel_iommu);
+
+/*
+ * DMAR Hotplug Support
+ * For more details, please refer to Intel(R) Virtualization Technology
+ * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
+ * "Remapping Hardware Unit Hot Plug".
+ */
+static u8 dmar_hp_uuid[] = {
+	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
+	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
+};
+
+/*
+ * Currently there's only one revision and BIOS will not check the revision id,
+ * so use 0 for safety.
+ */
+#define	DMAR_DSM_REV_ID			0
+#define	DMAR_DSM_FUNC_DRHD		1
+#define	DMAR_DSM_FUNC_ATSR		2
+#define	DMAR_DSM_FUNC_RHSA		3
+
+static inline bool dmar_detect_dsm(acpi_handle handle, int func)
+{
+	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+}
+
+static int dmar_walk_dsm_resource(acpi_handle handle, int func,
+				  dmar_res_handler_t handler, void *arg)
+{
+	int ret = -ENODEV;
+	union acpi_object *obj;
+	struct acpi_dmar_header *start;
+	struct dmar_res_callback callback;
+	static int res_type[] = {
+		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
+		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
+		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
+	};
+
+	if (!dmar_detect_dsm(handle, func))
+		return 0;
+
+	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+				      func, NULL, ACPI_TYPE_BUFFER);
+	if (!obj)
+		return -ENODEV;
+
+	memset(&callback, 0, sizeof(callback));
+	callback.cb[res_type[func]] = handler;
+	callback.arg[res_type[func]] = arg;
+	start = (struct acpi_dmar_header *)obj->buffer.pointer;
+	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
+
+	ACPI_FREE(obj);
+
+	return ret;
+}
+
+static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int ret;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return -ENODEV;
+
+	ret = dmar_ir_hotplug(dmaru, true);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, true);
+
+	return ret;
+}
+
+static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int i, ret;
+	struct device *dev;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return 0;
+
+	/*
+	 * All PCI devices managed by this unit should have been destroyed.
+	 */
+	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+		for_each_active_dev_scope(dmaru->devices,
+					  dmaru->devices_cnt, i, dev)
+			return -EBUSY;
+
+	ret = dmar_ir_hotplug(dmaru, false);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, false);
+
+	return ret;
+}
+
+static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (dmaru) {
+		list_del_rcu(&dmaru->list);
+		synchronize_rcu();
+		dmar_free_drhd(dmaru);
+	}
+
+	return 0;
+}
+
+static int dmar_hotplug_insert(acpi_handle handle)
+{
+	int ret;
+	int drhd_count = 0;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_validate_one_drhd, (void *)1);
+	if (ret)
+		goto out;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_parse_one_drhd, (void *)&drhd_count);
+	if (ret == 0 && drhd_count == 0) {
+		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
+		goto out;
+	} else if (ret) {
+		goto release_drhd;
+	}
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
+				     &dmar_parse_one_rhsa, NULL);
+	if (ret)
+		goto release_drhd;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_parse_one_atsr, NULL);
+	if (ret)
+		goto release_atsr;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_add_drhd, NULL);
+	if (!ret)
+		return 0;
+
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_remove_drhd, NULL);
+release_atsr:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+			       &dmar_release_one_atsr, NULL);
+release_drhd:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_release_drhd, NULL);
+out:
+	return ret;
+}
+
+static int dmar_hotplug_remove(acpi_handle handle)
+{
+	int ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_check_one_atsr, NULL);
+	if (ret)
+		return ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_remove_drhd, NULL);
+	if (ret == 0) {
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+					       &dmar_release_one_atsr, NULL));
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+					       &dmar_hp_release_drhd, NULL));
+	} else {
+		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				       &dmar_hp_add_drhd, NULL);
+	}
+
+	return ret;
+}
+
+static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
+				       void *context, void **retval)
+{
+	acpi_handle *phdl = retval;
+
+	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+		*phdl = handle;
+		return AE_CTRL_TERMINATE;
+	}
+
+	return AE_OK;
+}
+
+static int dmar_device_hotplug(acpi_handle handle, bool insert)
+{
+	int ret;
+	acpi_handle tmp = NULL;
+	acpi_status status;
+
+	if (!dmar_in_use())
+		return 0;
+
+	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+		tmp = handle;
+	} else {
+		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
+					     ACPI_UINT32_MAX,
+					     dmar_get_dsm_handle,
+					     NULL, NULL, &tmp);
+		if (ACPI_FAILURE(status)) {
+			pr_warn("Failed to locate _DSM method.\n");
+			return -ENXIO;
+		}
+	}
+	if (tmp == NULL)
+		return 0;
+
+	down_write(&dmar_global_lock);
+	if (insert)
+		ret = dmar_hotplug_insert(tmp);
+	else
+		ret = dmar_hotplug_remove(tmp);
+	up_write(&dmar_global_lock);
+
+	return ret;
+}
+
+int dmar_device_add(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, true);
+}
+
+int dmar_device_remove(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, false);
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 02cd26a..1232336 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -195,6 +195,7 @@
 }
 static inline void set_root_value(struct root_entry *root, unsigned long value)
 {
+	root->val &= ~VTD_PAGE_MASK;
 	root->val |= value & VTD_PAGE_MASK;
 }
 
@@ -247,6 +248,7 @@
 static inline void context_set_address_root(struct context_entry *context,
 					    unsigned long value)
 {
+	context->lo &= ~VTD_PAGE_MASK;
 	context->lo |= value & VTD_PAGE_MASK;
 }
 
@@ -328,17 +330,10 @@
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
 
-/* define the limit of IOMMUs supported in each domain */
-#ifdef	CONFIG_X86
-# define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
-#else
-# define	IOMMU_UNITS_SUPPORTED	64
-#endif
-
 struct dmar_domain {
 	int	id;			/* domain id */
 	int	nid;			/* node id */
-	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
+	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
 					/* bitmap of iommus this domain uses*/
 
 	struct list_head devices; 	/* all devices' list */
@@ -1132,8 +1127,11 @@
 	unsigned long flags;
 
 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
-	if (!root)
+	if (!root) {
+		pr_err("IOMMU: allocating root entry for %s failed\n",
+			iommu->name);
 		return -ENOMEM;
+	}
 
 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
 
@@ -1473,7 +1471,7 @@
 	return 0;
 }
 
-static void free_dmar_iommu(struct intel_iommu *iommu)
+static void disable_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct dmar_domain *domain;
 	int i;
@@ -1497,11 +1495,16 @@
 
 	if (iommu->gcmd & DMA_GCMD_TE)
 		iommu_disable_translation(iommu);
+}
 
-	kfree(iommu->domains);
-	kfree(iommu->domain_ids);
-	iommu->domains = NULL;
-	iommu->domain_ids = NULL;
+static void free_dmar_iommu(struct intel_iommu *iommu)
+{
+	if ((iommu->domains) && (iommu->domain_ids)) {
+		kfree(iommu->domains);
+		kfree(iommu->domain_ids);
+		iommu->domains = NULL;
+		iommu->domain_ids = NULL;
+	}
 
 	g_iommus[iommu->seq_id] = NULL;
 
@@ -1983,7 +1986,7 @@
 {
 	struct dma_pte *first_pte = NULL, *pte = NULL;
 	phys_addr_t uninitialized_var(pteval);
-	unsigned long sg_res;
+	unsigned long sg_res = 0;
 	unsigned int largepage_lvl = 0;
 	unsigned long lvl_pages = 0;
 
@@ -1994,10 +1997,8 @@
 
 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
 
-	if (sg)
-		sg_res = 0;
-	else {
-		sg_res = nr_pages + 1;
+	if (!sg) {
+		sg_res = nr_pages;
 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
 	}
 
@@ -2708,6 +2709,41 @@
 	return 0;
 }
 
+static void intel_iommu_init_qi(struct intel_iommu *iommu)
+{
+	/*
+	 * Start from the sane iommu hardware state.
+	 * If the queued invalidation is already initialized by us
+	 * (for example, while enabling interrupt-remapping) then
+	 * we got the things already rolling from a sane state.
+	 */
+	if (!iommu->qi) {
+		/*
+		 * Clear any previous faults.
+		 */
+		dmar_fault(-1, iommu);
+		/*
+		 * Disable queued invalidation if supported and already enabled
+		 * before OS handover.
+		 */
+		dmar_disable_qi(iommu);
+	}
+
+	if (dmar_enable_qi(iommu)) {
+		/*
+		 * Queued Invalidate not enabled, use Register Based Invalidate
+		 */
+		iommu->flush.flush_context = __iommu_flush_context;
+		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
+		pr_info("IOMMU: %s using Register based invalidation\n",
+			iommu->name);
+	} else {
+		iommu->flush.flush_context = qi_flush_context;
+		iommu->flush.flush_iotlb = qi_flush_iotlb;
+		pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
+	}
+}
+
 static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -2728,14 +2764,18 @@
 		 * threaded kernel __init code path all other access are read
 		 * only
 		 */
-		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
+		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
 			g_num_of_iommus++;
 			continue;
 		}
 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
-			  IOMMU_UNITS_SUPPORTED);
+			  DMAR_UNITS_SUPPORTED);
 	}
 
+	/* Preallocate enough resources for IOMMU hot-addition */
+	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
+		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
+
 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
 			GFP_KERNEL);
 	if (!g_iommus) {
@@ -2764,58 +2804,14 @@
 		 * among all IOMMU's. Need to Split it later.
 		 */
 		ret = iommu_alloc_root_entry(iommu);
-		if (ret) {
-			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
+		if (ret)
 			goto free_iommu;
-		}
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 	}
 
-	/*
-	 * Start from the sane iommu hardware state.
-	 */
-	for_each_active_iommu(iommu, drhd) {
-		/*
-		 * If the queued invalidation is already initialized by us
-		 * (for example, while enabling interrupt-remapping) then
-		 * we got the things already rolling from a sane state.
-		 */
-		if (iommu->qi)
-			continue;
-
-		/*
-		 * Clear any previous faults.
-		 */
-		dmar_fault(-1, iommu);
-		/*
-		 * Disable queued invalidation if supported and already enabled
-		 * before OS handover.
-		 */
-		dmar_disable_qi(iommu);
-	}
-
-	for_each_active_iommu(iommu, drhd) {
-		if (dmar_enable_qi(iommu)) {
-			/*
-			 * Queued Invalidate not enabled, use Register Based
-			 * Invalidate
-			 */
-			iommu->flush.flush_context = __iommu_flush_context;
-			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
-			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
-			       "invalidation\n",
-				iommu->seq_id,
-			       (unsigned long long)drhd->reg_base_addr);
-		} else {
-			iommu->flush.flush_context = qi_flush_context;
-			iommu->flush.flush_iotlb = qi_flush_iotlb;
-			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
-			       "invalidation\n",
-				iommu->seq_id,
-			       (unsigned long long)drhd->reg_base_addr);
-		}
-	}
+	for_each_active_iommu(iommu, drhd)
+		intel_iommu_init_qi(iommu);
 
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
@@ -2901,8 +2897,10 @@
 	return 0;
 
 free_iommu:
-	for_each_active_iommu(iommu, drhd)
+	for_each_active_iommu(iommu, drhd) {
+		disable_dmar_iommu(iommu);
 		free_dmar_iommu(iommu);
+	}
 	kfree(deferred_flush);
 free_g_iommus:
 	kfree(g_iommus);
@@ -3682,7 +3680,7 @@
 #endif	/* CONFIG_PM */
 
 
-int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct dmar_rmrr_unit *rmrru;
@@ -3708,17 +3706,48 @@
 	return 0;
 }
 
-int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
+{
+	struct dmar_atsr_unit *atsru;
+	struct acpi_dmar_atsr *tmp;
+
+	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
+		if (atsr->segment != tmp->segment)
+			continue;
+		if (atsr->header.length != tmp->header.length)
+			continue;
+		if (memcmp(atsr, tmp, atsr->header.length) == 0)
+			return atsru;
+	}
+
+	return NULL;
+}
+
+int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 {
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
+	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
+		return 0;
+
 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
-	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru)
+		return 0;
+
+	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
 	if (!atsru)
 		return -ENOMEM;
 
-	atsru->hdr = hdr;
+	/*
+	 * If memory is allocated from slab by ACPI _DSM method, we need to
+	 * copy the memory content because the memory buffer will be freed
+	 * on return.
+	 */
+	atsru->hdr = (void *)(atsru + 1);
+	memcpy(atsru->hdr, hdr, hdr->length);
 	atsru->include_all = atsr->flags & 0x1;
 	if (!atsru->include_all) {
 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
@@ -3741,6 +3770,138 @@
 	kfree(atsru);
 }
 
+int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru) {
+		list_del_rcu(&atsru->list);
+		synchronize_rcu();
+		intel_iommu_free_atsr(atsru);
+	}
+
+	return 0;
+}
+
+int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	int i;
+	struct device *dev;
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (!atsru)
+		return 0;
+
+	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
+					  i, dev)
+			return -EBUSY;
+
+	return 0;
+}
+
+static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
+{
+	int sp, ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (g_iommus[iommu->seq_id])
+		return 0;
+
+	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
+		pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+	if (!ecap_sc_support(iommu->ecap) &&
+	    domain_update_iommu_snooping(iommu)) {
+		pr_warn("IOMMU: %s doesn't support snooping.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+	sp = domain_update_iommu_superpage(iommu) - 1;
+	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
+		pr_warn("IOMMU: %s doesn't support large page.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+
+	/*
+	 * Disable translation if already enabled prior to OS handover.
+	 */
+	if (iommu->gcmd & DMA_GCMD_TE)
+		iommu_disable_translation(iommu);
+
+	g_iommus[iommu->seq_id] = iommu;
+	ret = iommu_init_domains(iommu);
+	if (ret == 0)
+		ret = iommu_alloc_root_entry(iommu);
+	if (ret)
+		goto out;
+
+	if (dmaru->ignored) {
+		/*
+		 * we always have to disable PMRs or DMA may fail on this device
+		 */
+		if (force_on)
+			iommu_disable_protect_mem_regions(iommu);
+		return 0;
+	}
+
+	intel_iommu_init_qi(iommu);
+	iommu_flush_write_buffer(iommu);
+	ret = dmar_set_interrupt(iommu);
+	if (ret)
+		goto disable_iommu;
+
+	iommu_set_root_entry(iommu);
+	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	iommu_enable_translation(iommu);
+
+	if (si_domain) {
+		ret = iommu_attach_domain(si_domain, iommu);
+		if (ret < 0 || si_domain->id != ret)
+			goto disable_iommu;
+		domain_attach_iommu(si_domain, iommu);
+	}
+
+	iommu_disable_protect_mem_regions(iommu);
+	return 0;
+
+disable_iommu:
+	disable_dmar_iommu(iommu);
+out:
+	free_dmar_iommu(iommu);
+	return ret;
+}
+
+int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	int ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (!intel_iommu_enabled)
+		return 0;
+	if (iommu == NULL)
+		return -EINVAL;
+
+	if (insert) {
+		ret = intel_iommu_add(dmaru);
+	} else {
+		disable_dmar_iommu(iommu);
+		free_dmar_iommu(iommu);
+	}
+
+	return ret;
+}
+
 static void intel_iommu_free_dmars(void)
 {
 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 7c80661..27541d4 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -36,7 +36,6 @@
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static struct hpet_scope ir_hpet[MAX_HPET_TBS];
-static int ir_ioapic_num, ir_hpet_num;
 
 /*
  * Lock ordering:
@@ -206,7 +205,7 @@
 	int i;
 
 	for (i = 0; i < MAX_HPET_TBS; i++)
-		if (ir_hpet[i].id == hpet_id)
+		if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
 			return ir_hpet[i].iommu;
 	return NULL;
 }
@@ -216,7 +215,7 @@
 	int i;
 
 	for (i = 0; i < MAX_IO_APICS; i++)
-		if (ir_ioapic[i].id == apic)
+		if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu)
 			return ir_ioapic[i].iommu;
 	return NULL;
 }
@@ -325,7 +324,7 @@
 
 	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_IO_APICS; i++) {
-		if (ir_ioapic[i].id == apic) {
+		if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
 			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 			break;
 		}
@@ -352,7 +351,7 @@
 
 	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_HPET_TBS; i++) {
-		if (ir_hpet[i].id == id) {
+		if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
 			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 			break;
 		}
@@ -473,17 +472,17 @@
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
-
-static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
+static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 {
 	struct ir_table *ir_table;
 	struct page *pages;
 	unsigned long *bitmap;
 
-	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
-					     GFP_ATOMIC);
+	if (iommu->ir_table)
+		return 0;
 
-	if (!iommu->ir_table)
+	ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC);
+	if (!ir_table)
 		return -ENOMEM;
 
 	pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
@@ -492,24 +491,37 @@
 	if (!pages) {
 		pr_err("IR%d: failed to allocate pages of order %d\n",
 		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
-		kfree(iommu->ir_table);
-		return -ENOMEM;
+		goto out_free_table;
 	}
 
 	bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
 			 sizeof(long), GFP_ATOMIC);
 	if (bitmap == NULL) {
 		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
-		__free_pages(pages, INTR_REMAP_PAGE_ORDER);
-		kfree(ir_table);
-		return -ENOMEM;
+		goto out_free_pages;
 	}
 
 	ir_table->base = page_address(pages);
 	ir_table->bitmap = bitmap;
-
-	iommu_set_irq_remapping(iommu, mode);
+	iommu->ir_table = ir_table;
 	return 0;
+
+out_free_pages:
+	__free_pages(pages, INTR_REMAP_PAGE_ORDER);
+out_free_table:
+	kfree(ir_table);
+	return -ENOMEM;
+}
+
+static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
+{
+	if (iommu && iommu->ir_table) {
+		free_pages((unsigned long)iommu->ir_table->base,
+			   INTR_REMAP_PAGE_ORDER);
+		kfree(iommu->ir_table->bitmap);
+		kfree(iommu->ir_table);
+		iommu->ir_table = NULL;
+	}
 }
 
 /*
@@ -666,9 +678,10 @@
 		if (!ecap_ir_support(iommu->ecap))
 			continue;
 
-		if (intel_setup_irq_remapping(iommu, eim))
+		if (intel_setup_irq_remapping(iommu))
 			goto error;
 
+		iommu_set_irq_remapping(iommu, eim);
 		setup = 1;
 	}
 
@@ -689,9 +702,11 @@
 	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
 
 error:
-	/*
-	 * handle error condition gracefully here!
-	 */
+	for_each_iommu(iommu, drhd)
+		if (ecap_ir_support(iommu->ecap)) {
+			iommu_disable_irq_remapping(iommu);
+			intel_teardown_irq_remapping(iommu);
+		}
 
 	if (x2apic_present)
 		pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
@@ -699,12 +714,13 @@
 	return -1;
 }
 
-static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
-				      struct intel_iommu *iommu)
+static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
+				   struct intel_iommu *iommu,
+				   struct acpi_dmar_hardware_unit *drhd)
 {
 	struct acpi_dmar_pci_path *path;
 	u8 bus;
-	int count;
+	int count, free = -1;
 
 	bus = scope->bus;
 	path = (struct acpi_dmar_pci_path *)(scope + 1);
@@ -720,19 +736,36 @@
 					   PCI_SECONDARY_BUS);
 		path++;
 	}
-	ir_hpet[ir_hpet_num].bus   = bus;
-	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);
-	ir_hpet[ir_hpet_num].iommu = iommu;
-	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;
-	ir_hpet_num++;
+
+	for (count = 0; count < MAX_HPET_TBS; count++) {
+		if (ir_hpet[count].iommu == iommu &&
+		    ir_hpet[count].id == scope->enumeration_id)
+			return 0;
+		else if (ir_hpet[count].iommu == NULL && free == -1)
+			free = count;
+	}
+	if (free == -1) {
+		pr_warn("Exceeded Max HPET blocks\n");
+		return -ENOSPC;
+	}
+
+	ir_hpet[free].iommu = iommu;
+	ir_hpet[free].id    = scope->enumeration_id;
+	ir_hpet[free].bus   = bus;
+	ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function);
+	pr_info("HPET id %d under DRHD base 0x%Lx\n",
+		scope->enumeration_id, drhd->address);
+
+	return 0;
 }
 
-static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
-				      struct intel_iommu *iommu)
+static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
+				     struct intel_iommu *iommu,
+				     struct acpi_dmar_hardware_unit *drhd)
 {
 	struct acpi_dmar_pci_path *path;
 	u8 bus;
-	int count;
+	int count, free = -1;
 
 	bus = scope->bus;
 	path = (struct acpi_dmar_pci_path *)(scope + 1);
@@ -749,54 +782,63 @@
 		path++;
 	}
 
-	ir_ioapic[ir_ioapic_num].bus   = bus;
-	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
-	ir_ioapic[ir_ioapic_num].iommu = iommu;
-	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
-	ir_ioapic_num++;
+	for (count = 0; count < MAX_IO_APICS; count++) {
+		if (ir_ioapic[count].iommu == iommu &&
+		    ir_ioapic[count].id == scope->enumeration_id)
+			return 0;
+		else if (ir_ioapic[count].iommu == NULL && free == -1)
+			free = count;
+	}
+	if (free == -1) {
+		pr_warn("Exceeded Max IO APICS\n");
+		return -ENOSPC;
+	}
+
+	ir_ioapic[free].bus   = bus;
+	ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function);
+	ir_ioapic[free].iommu = iommu;
+	ir_ioapic[free].id    = scope->enumeration_id;
+	pr_info("IOAPIC id %d under DRHD base  0x%Lx IOMMU %d\n",
+		scope->enumeration_id, drhd->address, iommu->seq_id);
+
+	return 0;
 }
 
 static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
 				      struct intel_iommu *iommu)
 {
+	int ret = 0;
 	struct acpi_dmar_hardware_unit *drhd;
 	struct acpi_dmar_device_scope *scope;
 	void *start, *end;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
-
 	start = (void *)(drhd + 1);
 	end = ((void *)drhd) + header->length;
 
-	while (start < end) {
+	while (start < end && ret == 0) {
 		scope = start;
-		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
-			if (ir_ioapic_num == MAX_IO_APICS) {
-				printk(KERN_WARNING "Exceeded Max IO APICS\n");
-				return -1;
-			}
-
-			printk(KERN_INFO "IOAPIC id %d under DRHD base "
-			       " 0x%Lx IOMMU %d\n", scope->enumeration_id,
-			       drhd->address, iommu->seq_id);
-
-			ir_parse_one_ioapic_scope(scope, iommu);
-		} else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
-			if (ir_hpet_num == MAX_HPET_TBS) {
-				printk(KERN_WARNING "Exceeded Max HPET blocks\n");
-				return -1;
-			}
-
-			printk(KERN_INFO "HPET id %d under DRHD base"
-			       " 0x%Lx\n", scope->enumeration_id,
-			       drhd->address);
-
-			ir_parse_one_hpet_scope(scope, iommu);
-		}
+		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC)
+			ret = ir_parse_one_ioapic_scope(scope, iommu, drhd);
+		else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET)
+			ret = ir_parse_one_hpet_scope(scope, iommu, drhd);
 		start += scope->length;
 	}
 
-	return 0;
+	return ret;
+}
+
+static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu)
+{
+	int i;
+
+	for (i = 0; i < MAX_HPET_TBS; i++)
+		if (ir_hpet[i].iommu == iommu)
+			ir_hpet[i].iommu = NULL;
+
+	for (i = 0; i < MAX_IO_APICS; i++)
+		if (ir_ioapic[i].iommu == iommu)
+			ir_ioapic[i].iommu = NULL;
 }
 
 /*
@@ -1171,3 +1213,86 @@
 	.msi_setup_irq		= intel_msi_setup_irq,
 	.alloc_hpet_msi		= intel_alloc_hpet_msi,
 };
+
+/*
+ * Support of Interrupt Remapping Unit Hotplug
+ */
+static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
+{
+	int ret;
+	int eim = x2apic_enabled();
+
+	if (eim && !ecap_eim_support(iommu->ecap)) {
+		pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
+			iommu->reg_phys, iommu->ecap);
+		return -ENODEV;
+	}
+
+	if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) {
+		pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n",
+			iommu->reg_phys);
+		return -ENODEV;
+	}
+
+	/* TODO: check all IOAPICs are covered by IOMMU */
+
+	/* Setup Interrupt-remapping now. */
+	ret = intel_setup_irq_remapping(iommu);
+	if (ret) {
+		pr_err("DRHD %Lx: failed to allocate resource\n",
+		       iommu->reg_phys);
+		ir_remove_ioapic_hpet_scope(iommu);
+		return ret;
+	}
+
+	if (!iommu->qi) {
+		/* Clear previous faults. */
+		dmar_fault(-1, iommu);
+		iommu_disable_irq_remapping(iommu);
+		dmar_disable_qi(iommu);
+	}
+
+	/* Enable queued invalidation */
+	ret = dmar_enable_qi(iommu);
+	if (!ret) {
+		iommu_set_irq_remapping(iommu, eim);
+	} else {
+		pr_err("DRHD %Lx: failed to enable queued invalidation, ecap %Lx, ret %d\n",
+		       iommu->reg_phys, iommu->ecap, ret);
+		intel_teardown_irq_remapping(iommu);
+		ir_remove_ioapic_hpet_scope(iommu);
+	}
+
+	return ret;
+}
+
+int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	int ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (!irq_remapping_enabled)
+		return 0;
+	if (iommu == NULL)
+		return -EINVAL;
+	if (!ecap_ir_support(iommu->ecap))
+		return 0;
+
+	if (insert) {
+		if (!iommu->ir_table)
+			ret = dmar_ir_add(dmaru, iommu);
+	} else {
+		if (iommu->ir_table) {
+			if (!bitmap_empty(iommu->ir_table->bitmap,
+					  INTR_REMAP_TABLE_ENTRIES)) {
+				ret = -EBUSY;
+			} else {
+				iommu_disable_irq_remapping(iommu);
+				intel_teardown_irq_remapping(iommu);
+				ir_remove_ioapic_hpet_scope(iommu);
+			}
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 02e4313..1bd6335 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1143,14 +1143,24 @@
 {
 	struct scatterlist *s;
 	size_t mapped = 0;
-	unsigned int i;
+	unsigned int i, min_pagesz;
 	int ret;
 
-	for_each_sg(sg, s, nents, i) {
-		phys_addr_t phys = page_to_phys(sg_page(s));
+	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
+		return 0;
 
-		/* We are mapping on page boundarys, so offset must be 0 */
-		if (s->offset)
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	for_each_sg(sg, s, nents, i) {
+		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
+
+		/*
+		 * We are mapping on IOMMU page boundaries, so offset within
+		 * the page must be 0. However, the IOMMU may support pages
+		 * smaller than PAGE_SIZE, so s->offset may still represent
+		 * an offset of that boundary within the CPU page.
+		 */
+		if (!IS_ALIGNED(s->offset, min_pagesz))
 			goto out_err;
 
 		ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index e509c58..99effbb 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1185,7 +1185,7 @@
 			       dev_name(&pdev->dev), mmu);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
-		return irq;
+		return ret;
 	}
 
 	ipmmu_device_reset(mmu);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 1c7b78e..e1b0537 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -73,8 +73,7 @@
 
 static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
 {
-	if (drvdata->clk)
-		clk_disable(drvdata->clk);
+	clk_disable(drvdata->clk);
 	clk_disable(drvdata->pclk);
 }
 
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
index 61def7cb..b6d01f9 100644
--- a/drivers/iommu/msm_iommu_dev.c
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -131,7 +131,7 @@
 	struct clk *iommu_clk;
 	struct clk *iommu_pclk;
 	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
+	struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev);
 	void __iomem *regs_base;
 	int ret, irq, par;
 
@@ -224,8 +224,7 @@
 
 	platform_set_drvdata(pdev, drvdata);
 
-	if (iommu_clk)
-		clk_disable(iommu_clk);
+	clk_disable(iommu_clk);
 
 	clk_disable(iommu_pclk);
 
@@ -264,7 +263,7 @@
 
 static int msm_iommu_ctx_probe(struct platform_device *pdev)
 {
-	struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
+	struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev);
 	struct msm_iommu_drvdata *drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	int i, ret;
@@ -323,8 +322,7 @@
 		SET_NSCFG(drvdata->base, mid, 3);
 	}
 
-	if (drvdata->clk)
-		clk_disable(drvdata->clk);
+	clk_disable(drvdata->clk);
 	clk_disable(drvdata->pclk);
 
 	dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 531658d..f3d20a2 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -10,45 +10,35 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
 #include <linux/err.h>
-#include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
-#include <linux/platform_device.h>
 #include <linux/debugfs.h>
-#include <linux/omap-iommu.h>
 #include <linux/platform_data/iommu-omap.h>
 
 #include "omap-iopgtable.h"
 #include "omap-iommu.h"
 
-#define MAXCOLUMN 100 /* for short messages */
-
 static DEFINE_MUTEX(iommu_debug_lock);
 
 static struct dentry *iommu_debug_root;
 
-static ssize_t debug_read_ver(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
+static inline bool is_omap_iommu_detached(struct omap_iommu *obj)
 {
-	u32 ver = omap_iommu_arch_version();
-	char buf[MAXCOLUMN], *p = buf;
-
-	p += sprintf(p, "H/W version: %d.%d\n", (ver >> 4) & 0xf , ver & 0xf);
-
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	return !obj->domain;
 }
 
 static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
 			       size_t count, loff_t *ppos)
 {
-	struct device *dev = file->private_data;
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	struct omap_iommu *obj = file->private_data;
 	char *p, *buf;
 	ssize_t bytes;
 
+	if (is_omap_iommu_detached(obj))
+		return -EPERM;
+
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -68,11 +58,13 @@
 static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
 			      size_t count, loff_t *ppos)
 {
-	struct device *dev = file->private_data;
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	struct omap_iommu *obj = file->private_data;
 	char *p, *buf;
 	ssize_t bytes, rest;
 
+	if (is_omap_iommu_detached(obj))
+		return -EPERM;
+
 	buf = kmalloc(count, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
@@ -93,133 +85,69 @@
 	return bytes;
 }
 
-static ssize_t debug_write_pagetable(struct file *file,
-		     const char __user *userbuf, size_t count, loff_t *ppos)
+static void dump_ioptable(struct seq_file *s)
 {
-	struct iotlb_entry e;
-	struct cr_regs cr;
-	int err;
-	struct device *dev = file->private_data;
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	char buf[MAXCOLUMN], *p = buf;
-
-	count = min(count, sizeof(buf));
-
-	mutex_lock(&iommu_debug_lock);
-	if (copy_from_user(p, userbuf, count)) {
-		mutex_unlock(&iommu_debug_lock);
-		return -EFAULT;
-	}
-
-	sscanf(p, "%x %x", &cr.cam, &cr.ram);
-	if (!cr.cam || !cr.ram) {
-		mutex_unlock(&iommu_debug_lock);
-		return -EINVAL;
-	}
-
-	omap_iotlb_cr_to_e(&cr, &e);
-	err = omap_iopgtable_store_entry(obj, &e);
-	if (err)
-		dev_err(obj->dev, "%s: fail to store cr\n", __func__);
-
-	mutex_unlock(&iommu_debug_lock);
-	return count;
-}
-
-#define dump_ioptable_entry_one(lv, da, val)			\
-	({							\
-		int __err = 0;					\
-		ssize_t bytes;					\
-		const int maxcol = 22;				\
-		const char *str = "%d: %08x %08x\n";		\
-		bytes = snprintf(p, maxcol, str, lv, da, val);	\
-		p += bytes;					\
-		len -= bytes;					\
-		if (len < maxcol)				\
-			__err = -ENOMEM;			\
-		__err;						\
-	})
-
-static ssize_t dump_ioptable(struct omap_iommu *obj, char *buf, ssize_t len)
-{
-	int i;
-	u32 *iopgd;
-	char *p = buf;
+	int i, j;
+	u32 da;
+	u32 *iopgd, *iopte;
+	struct omap_iommu *obj = s->private;
 
 	spin_lock(&obj->page_table_lock);
 
 	iopgd = iopgd_offset(obj, 0);
 	for (i = 0; i < PTRS_PER_IOPGD; i++, iopgd++) {
-		int j, err;
-		u32 *iopte;
-		u32 da;
-
 		if (!*iopgd)
 			continue;
 
 		if (!(*iopgd & IOPGD_TABLE)) {
 			da = i << IOPGD_SHIFT;
-
-			err = dump_ioptable_entry_one(1, da, *iopgd);
-			if (err)
-				goto out;
+			seq_printf(s, "1: 0x%08x 0x%08x\n", da, *iopgd);
 			continue;
 		}
 
 		iopte = iopte_offset(iopgd, 0);
-
 		for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) {
 			if (!*iopte)
 				continue;
 
 			da = (i << IOPGD_SHIFT) + (j << IOPTE_SHIFT);
-			err = dump_ioptable_entry_one(2, da, *iopgd);
-			if (err)
-				goto out;
+			seq_printf(s, "2: 0x%08x 0x%08x\n", da, *iopte);
 		}
 	}
-out:
-	spin_unlock(&obj->page_table_lock);
 
-	return p - buf;
+	spin_unlock(&obj->page_table_lock);
 }
 
-static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
-				    size_t count, loff_t *ppos)
+static int debug_read_pagetable(struct seq_file *s, void *data)
 {
-	struct device *dev = file->private_data;
-	struct omap_iommu *obj = dev_to_omap_iommu(dev);
-	char *p, *buf;
-	size_t bytes;
+	struct omap_iommu *obj = s->private;
 
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	p += sprintf(p, "L: %8s %8s\n", "da:", "pa:");
-	p += sprintf(p, "-----------------------------------------\n");
+	if (is_omap_iommu_detached(obj))
+		return -EPERM;
 
 	mutex_lock(&iommu_debug_lock);
 
-	bytes = PAGE_SIZE - (p - buf);
-	p += dump_ioptable(obj, p, bytes);
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	seq_printf(s, "L: %8s %8s\n", "da:", "pte:");
+	seq_puts(s, "--------------------------\n");
+	dump_ioptable(s);
 
 	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
 
-	return bytes;
+	return 0;
 }
 
-#define DEBUG_FOPS(name)						\
-	static const struct file_operations debug_##name##_fops = {	\
-		.open = simple_open,					\
-		.read = debug_read_##name,				\
-		.write = debug_write_##name,				\
-		.llseek = generic_file_llseek,				\
-	};
+#define DEBUG_SEQ_FOPS_RO(name)						       \
+	static int debug_open_##name(struct inode *inode, struct file *file)   \
+	{								       \
+		return single_open(file, debug_read_##name, inode->i_private); \
+	}								       \
+									       \
+	static const struct file_operations debug_##name##_fops = {	       \
+		.open		= debug_open_##name,			       \
+		.read		= seq_read,				       \
+		.llseek		= seq_lseek,				       \
+		.release	= single_release,			       \
+	}
 
 #define DEBUG_FOPS_RO(name)						\
 	static const struct file_operations debug_##name##_fops = {	\
@@ -228,103 +156,63 @@
 		.llseek = generic_file_llseek,				\
 	};
 
-DEBUG_FOPS_RO(ver);
 DEBUG_FOPS_RO(regs);
 DEBUG_FOPS_RO(tlb);
-DEBUG_FOPS(pagetable);
+DEBUG_SEQ_FOPS_RO(pagetable);
 
 #define __DEBUG_ADD_FILE(attr, mode)					\
 	{								\
 		struct dentry *dent;					\
-		dent = debugfs_create_file(#attr, mode, parent,		\
-					   dev, &debug_##attr##_fops);	\
+		dent = debugfs_create_file(#attr, mode, obj->debug_dir,	\
+					   obj, &debug_##attr##_fops);	\
 		if (!dent)						\
-			return -ENOMEM;					\
+			goto err;					\
 	}
 
-#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 0600)
 #define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400)
 
-static int iommu_debug_register(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct omap_iommu *obj = platform_get_drvdata(pdev);
-	struct omap_iommu_arch_data *arch_data;
-	struct dentry *d, *parent;
-
-	if (!obj || !obj->dev)
-		return -EINVAL;
-
-	arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL);
-	if (!arch_data)
-		return -ENOMEM;
-
-	arch_data->iommu_dev = obj;
-
-	dev->archdata.iommu = arch_data;
-
-	d = debugfs_create_dir(obj->name, iommu_debug_root);
-	if (!d)
-		goto nomem;
-	parent = d;
-
-	d = debugfs_create_u8("nr_tlb_entries", 400, parent,
-			      (u8 *)&obj->nr_tlb_entries);
-	if (!d)
-		goto nomem;
-
-	DEBUG_ADD_FILE_RO(ver);
-	DEBUG_ADD_FILE_RO(regs);
-	DEBUG_ADD_FILE_RO(tlb);
-	DEBUG_ADD_FILE(pagetable);
-
-	return 0;
-
-nomem:
-	kfree(arch_data);
-	return -ENOMEM;
-}
-
-static int iommu_debug_unregister(struct device *dev, void *data)
-{
-	if (!dev->archdata.iommu)
-		return 0;
-
-	kfree(dev->archdata.iommu);
-
-	dev->archdata.iommu = NULL;
-
-	return 0;
-}
-
-static int __init iommu_debug_init(void)
+void omap_iommu_debugfs_add(struct omap_iommu *obj)
 {
 	struct dentry *d;
-	int err;
 
-	d = debugfs_create_dir("iommu", NULL);
+	if (!iommu_debug_root)
+		return;
+
+	obj->debug_dir = debugfs_create_dir(obj->name, iommu_debug_root);
+	if (!obj->debug_dir)
+		return;
+
+	d = debugfs_create_u8("nr_tlb_entries", 0400, obj->debug_dir,
+			      (u8 *)&obj->nr_tlb_entries);
 	if (!d)
-		return -ENOMEM;
-	iommu_debug_root = d;
+		return;
 
-	err = omap_foreach_iommu_device(d, iommu_debug_register);
-	if (err)
-		goto err_out;
-	return 0;
+	DEBUG_ADD_FILE_RO(regs);
+	DEBUG_ADD_FILE_RO(tlb);
+	DEBUG_ADD_FILE_RO(pagetable);
 
-err_out:
-	debugfs_remove_recursive(iommu_debug_root);
-	return err;
+	return;
+
+err:
+	debugfs_remove_recursive(obj->debug_dir);
 }
-module_init(iommu_debug_init)
 
-static void __exit iommu_debugfs_exit(void)
+void omap_iommu_debugfs_remove(struct omap_iommu *obj)
 {
-	debugfs_remove_recursive(iommu_debug_root);
-	omap_foreach_iommu_device(NULL, iommu_debug_unregister);
-}
-module_exit(iommu_debugfs_exit)
+	if (!obj->debug_dir)
+		return;
 
-MODULE_DESCRIPTION("omap iommu: debugfs interface");
-MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
-MODULE_LICENSE("GPL v2");
+	debugfs_remove_recursive(obj->debug_dir);
+}
+
+void __init omap_iommu_debugfs_init(void)
+{
+	iommu_debug_root = debugfs_create_dir("omap_iommu", NULL);
+	if (!iommu_debug_root)
+		pr_err("can't create debugfs dir\n");
+}
+
+void __exit omap_iommu_debugfs_exit(void)
+{
+	debugfs_remove(iommu_debug_root);
+}
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 18003c04..bbb7dce 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -76,53 +76,23 @@
 	short vict;
 };
 
-/* accommodate the difference between omap1 and omap2/3 */
-static const struct iommu_functions *arch_iommu;
-
 static struct platform_driver omap_iommu_driver;
 static struct kmem_cache *iopte_cachep;
 
 /**
- * omap_install_iommu_arch - Install archtecure specific iommu functions
- * @ops:	a pointer to architecture specific iommu functions
- *
- * There are several kind of iommu algorithm(tlb, pagetable) among
- * omap series. This interface installs such an iommu algorighm.
- **/
-int omap_install_iommu_arch(const struct iommu_functions *ops)
-{
-	if (arch_iommu)
-		return -EBUSY;
-
-	arch_iommu = ops;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(omap_install_iommu_arch);
-
-/**
- * omap_uninstall_iommu_arch - Uninstall archtecure specific iommu functions
- * @ops:	a pointer to architecture specific iommu functions
- *
- * This interface uninstalls the iommu algorighm installed previously.
- **/
-void omap_uninstall_iommu_arch(const struct iommu_functions *ops)
-{
-	if (arch_iommu != ops)
-		pr_err("%s: not your arch\n", __func__);
-
-	arch_iommu = NULL;
-}
-EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
-
-/**
  * omap_iommu_save_ctx - Save registers for pm off-mode support
  * @dev:	client device
  **/
 void omap_iommu_save_ctx(struct device *dev)
 {
 	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	u32 *p = obj->ctx;
+	int i;
 
-	arch_iommu->save_ctx(obj);
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		p[i] = iommu_read_reg(obj, i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
 }
 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
 
@@ -133,28 +103,74 @@
 void omap_iommu_restore_ctx(struct device *dev)
 {
 	struct omap_iommu *obj = dev_to_omap_iommu(dev);
+	u32 *p = obj->ctx;
+	int i;
 
-	arch_iommu->restore_ctx(obj);
+	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+		iommu_write_reg(obj, p[i], i * sizeof(u32));
+		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+	}
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
 
-/**
- * omap_iommu_arch_version - Return running iommu arch version
- **/
-u32 omap_iommu_arch_version(void)
+static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 {
-	return arch_iommu->version;
+	u32 l = iommu_read_reg(obj, MMU_CNTL);
+
+	if (on)
+		iommu_write_reg(obj, MMU_IRQ_TWL_MASK, MMU_IRQENABLE);
+	else
+		iommu_write_reg(obj, MMU_IRQ_TLB_MISS_MASK, MMU_IRQENABLE);
+
+	l &= ~MMU_CNTL_MASK;
+	if (on)
+		l |= (MMU_CNTL_MMU_EN | MMU_CNTL_TWL_EN);
+	else
+		l |= (MMU_CNTL_MMU_EN);
+
+	iommu_write_reg(obj, l, MMU_CNTL);
 }
-EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
+
+static int omap2_iommu_enable(struct omap_iommu *obj)
+{
+	u32 l, pa;
+
+	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
+		return -EINVAL;
+
+	pa = virt_to_phys(obj->iopgd);
+	if (!IS_ALIGNED(pa, SZ_16K))
+		return -EINVAL;
+
+	l = iommu_read_reg(obj, MMU_REVISION);
+	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
+		 (l >> 4) & 0xf, l & 0xf);
+
+	iommu_write_reg(obj, pa, MMU_TTB);
+
+	if (obj->has_bus_err_back)
+		iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
+
+	__iommu_set_twl(obj, true);
+
+	return 0;
+}
+
+static void omap2_iommu_disable(struct omap_iommu *obj)
+{
+	u32 l = iommu_read_reg(obj, MMU_CNTL);
+
+	l &= ~MMU_CNTL_MASK;
+	iommu_write_reg(obj, l, MMU_CNTL);
+
+	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
+}
 
 static int iommu_enable(struct omap_iommu *obj)
 {
 	int err;
 	struct platform_device *pdev = to_platform_device(obj->dev);
-	struct iommu_platform_data *pdata = pdev->dev.platform_data;
-
-	if (!arch_iommu)
-		return -ENODEV;
+	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
 	if (pdata && pdata->deassert_reset) {
 		err = pdata->deassert_reset(pdev, pdata->reset_name);
@@ -166,7 +182,7 @@
 
 	pm_runtime_get_sync(obj->dev);
 
-	err = arch_iommu->enable(obj);
+	err = omap2_iommu_enable(obj);
 
 	return err;
 }
@@ -174,9 +190,9 @@
 static void iommu_disable(struct omap_iommu *obj)
 {
 	struct platform_device *pdev = to_platform_device(obj->dev);
-	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
-	arch_iommu->disable(obj);
+	omap2_iommu_disable(obj);
 
 	pm_runtime_put_sync(obj->dev);
 
@@ -187,44 +203,51 @@
 /*
  *	TLB operations
  */
-void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
-{
-	BUG_ON(!cr || !e);
-
-	arch_iommu->cr_to_e(cr, e);
-}
-EXPORT_SYMBOL_GPL(omap_iotlb_cr_to_e);
-
 static inline int iotlb_cr_valid(struct cr_regs *cr)
 {
 	if (!cr)
 		return -EINVAL;
 
-	return arch_iommu->cr_valid(cr);
-}
-
-static inline struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj,
-					     struct iotlb_entry *e)
-{
-	if (!e)
-		return NULL;
-
-	return arch_iommu->alloc_cr(obj, e);
+	return cr->cam & MMU_CAM_V;
 }
 
 static u32 iotlb_cr_to_virt(struct cr_regs *cr)
 {
-	return arch_iommu->cr_to_virt(cr);
+	u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK;
+	u32 mask = get_cam_va_mask(cr->cam & page_size);
+
+	return cr->cam & mask;
 }
 
 static u32 get_iopte_attr(struct iotlb_entry *e)
 {
-	return arch_iommu->get_pte_attr(e);
+	u32 attr;
+
+	attr = e->mixed << 5;
+	attr |= e->endian;
+	attr |= e->elsz >> 3;
+	attr <<= (((e->pgsz == MMU_CAM_PGSZ_4K) ||
+			(e->pgsz == MMU_CAM_PGSZ_64K)) ? 0 : 6);
+	return attr;
 }
 
 static u32 iommu_report_fault(struct omap_iommu *obj, u32 *da)
 {
-	return arch_iommu->fault_isr(obj, da);
+	u32 status, fault_addr;
+
+	status = iommu_read_reg(obj, MMU_IRQSTATUS);
+	status &= MMU_IRQ_MASK;
+	if (!status) {
+		*da = 0;
+		return 0;
+	}
+
+	fault_addr = iommu_read_reg(obj, MMU_FAULT_AD);
+	*da = fault_addr;
+
+	iommu_write_reg(obj, status, MMU_IRQSTATUS);
+
+	return status;
 }
 
 static void iotlb_lock_get(struct omap_iommu *obj, struct iotlb_lock *l)
@@ -250,31 +273,19 @@
 
 static void iotlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
 {
-	arch_iommu->tlb_read_cr(obj, cr);
+	cr->cam = iommu_read_reg(obj, MMU_READ_CAM);
+	cr->ram = iommu_read_reg(obj, MMU_READ_RAM);
 }
 
 static void iotlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
 {
-	arch_iommu->tlb_load_cr(obj, cr);
+	iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM);
+	iommu_write_reg(obj, cr->ram, MMU_RAM);
 
 	iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
 	iommu_write_reg(obj, 1, MMU_LD_TLB);
 }
 
-/**
- * iotlb_dump_cr - Dump an iommu tlb entry into buf
- * @obj:	target iommu
- * @cr:		contents of cam and ram register
- * @buf:	output buffer
- **/
-static inline ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
-				    char *buf)
-{
-	BUG_ON(!cr || !buf);
-
-	return arch_iommu->dump_cr(obj, cr, buf);
-}
-
 /* only used in iotlb iteration for-loop */
 static struct cr_regs __iotlb_read_cr(struct omap_iommu *obj, int n)
 {
@@ -289,12 +300,36 @@
 	return cr;
 }
 
+#ifdef PREFETCH_IOTLB
+static struct cr_regs *iotlb_alloc_cr(struct omap_iommu *obj,
+				      struct iotlb_entry *e)
+{
+	struct cr_regs *cr;
+
+	if (!e)
+		return NULL;
+
+	if (e->da & ~(get_cam_va_mask(e->pgsz))) {
+		dev_err(obj->dev, "%s:\twrong alignment: %08x\n", __func__,
+			e->da);
+		return ERR_PTR(-EINVAL);
+	}
+
+	cr = kmalloc(sizeof(*cr), GFP_KERNEL);
+	if (!cr)
+		return ERR_PTR(-ENOMEM);
+
+	cr->cam = (e->da & MMU_CAM_VATAG_MASK) | e->prsvd | e->pgsz | e->valid;
+	cr->ram = e->pa | e->endian | e->elsz | e->mixed;
+
+	return cr;
+}
+
 /**
  * load_iotlb_entry - Set an iommu tlb entry
  * @obj:	target iommu
  * @e:		an iommu tlb entry info
  **/
-#ifdef PREFETCH_IOTLB
 static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 {
 	int err = 0;
@@ -423,7 +458,45 @@
 	pm_runtime_put_sync(obj->dev);
 }
 
-#if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
+#ifdef CONFIG_OMAP_IOMMU_DEBUG
+
+#define pr_reg(name)							\
+	do {								\
+		ssize_t bytes;						\
+		const char *str = "%20s: %08x\n";			\
+		const int maxcol = 32;					\
+		bytes = snprintf(p, maxcol, str, __stringify(name),	\
+				 iommu_read_reg(obj, MMU_##name));	\
+		p += bytes;						\
+		len -= bytes;						\
+		if (len < maxcol)					\
+			goto out;					\
+	} while (0)
+
+static ssize_t
+omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
+{
+	char *p = buf;
+
+	pr_reg(REVISION);
+	pr_reg(IRQSTATUS);
+	pr_reg(IRQENABLE);
+	pr_reg(WALKING_ST);
+	pr_reg(CNTL);
+	pr_reg(FAULT_AD);
+	pr_reg(TTB);
+	pr_reg(LOCK);
+	pr_reg(LD_TLB);
+	pr_reg(CAM);
+	pr_reg(RAM);
+	pr_reg(GFLUSH);
+	pr_reg(FLUSH_ENTRY);
+	pr_reg(READ_CAM);
+	pr_reg(READ_RAM);
+	pr_reg(EMU_FAULT_AD);
+out:
+	return p - buf;
+}
 
 ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
 {
@@ -432,13 +505,12 @@
 
 	pm_runtime_get_sync(obj->dev);
 
-	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
+	bytes = omap2_iommu_dump_ctx(obj, buf, bytes);
 
 	pm_runtime_put_sync(obj->dev);
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(omap_iommu_dump_ctx);
 
 static int
 __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
@@ -464,6 +536,24 @@
 }
 
 /**
+ * iotlb_dump_cr - Dump an iommu tlb entry into buf
+ * @obj:	target iommu
+ * @cr:		contents of cam and ram register
+ * @buf:	output buffer
+ **/
+static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
+			     char *buf)
+{
+	char *p = buf;
+
+	/* FIXME: Need more detail analysis of cam/ram */
+	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
+					(cr->cam & MMU_CAM_P) ? 1 : 0);
+
+	return p - buf;
+}
+
+/**
  * omap_dump_tlb_entries - dump cr arrays to given buffer
  * @obj:	target iommu
  * @buf:	output buffer
@@ -488,16 +578,8 @@
 
 	return p - buf;
 }
-EXPORT_SYMBOL_GPL(omap_dump_tlb_entries);
 
-int omap_foreach_iommu_device(void *data, int (*fn)(struct device *, void *))
-{
-	return driver_for_each_device(&omap_iommu_driver.driver,
-				      NULL, data, fn);
-}
-EXPORT_SYMBOL_GPL(omap_foreach_iommu_device);
-
-#endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */
+#endif /* CONFIG_OMAP_IOMMU_DEBUG */
 
 /*
  *	H/W pagetable operations
@@ -680,7 +762,8 @@
  * @obj:	target iommu
  * @e:		an iommu tlb entry info
  **/
-int omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e)
+static int
+omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 {
 	int err;
 
@@ -690,7 +773,6 @@
 		prefetch_iotlb_entry(obj, e);
 	return err;
 }
-EXPORT_SYMBOL_GPL(omap_iopgtable_store_entry);
 
 /**
  * iopgtable_lookup_entry - Lookup an iommu pte entry
@@ -819,8 +901,9 @@
 	u32 *iopgd, *iopte;
 	struct omap_iommu *obj = data;
 	struct iommu_domain *domain = obj->domain;
+	struct omap_iommu_domain *omap_domain = domain->priv;
 
-	if (!obj->refcount)
+	if (!omap_domain->iommu_dev)
 		return IRQ_NONE;
 
 	errs = iommu_report_fault(obj, &da);
@@ -880,13 +963,6 @@
 
 	spin_lock(&obj->iommu_lock);
 
-	/* an iommu device can only be attached once */
-	if (++obj->refcount > 1) {
-		dev_err(dev, "%s: already attached!\n", obj->name);
-		err = -EBUSY;
-		goto err_enable;
-	}
-
 	obj->iopgd = iopgd;
 	err = iommu_enable(obj);
 	if (err)
@@ -899,7 +975,6 @@
 	return obj;
 
 err_enable:
-	obj->refcount--;
 	spin_unlock(&obj->iommu_lock);
 	return ERR_PTR(err);
 }
@@ -915,9 +990,7 @@
 
 	spin_lock(&obj->iommu_lock);
 
-	if (--obj->refcount == 0)
-		iommu_disable(obj);
-
+	iommu_disable(obj);
 	obj->iopgd = NULL;
 
 	spin_unlock(&obj->iommu_lock);
@@ -934,7 +1007,7 @@
 	int irq;
 	struct omap_iommu *obj;
 	struct resource *res;
-	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *of = pdev->dev.of_node;
 
 	obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
@@ -981,6 +1054,8 @@
 	pm_runtime_irq_safe(obj->dev);
 	pm_runtime_enable(obj->dev);
 
+	omap_iommu_debugfs_add(obj);
+
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
 }
@@ -990,6 +1065,7 @@
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
 	iopgtable_clear_entry_all(obj);
+	omap_iommu_debugfs_remove(obj);
 
 	pm_runtime_disable(obj->dev);
 
@@ -1026,7 +1102,6 @@
 	e->da		= da;
 	e->pa		= pa;
 	e->valid	= MMU_CAM_V;
-	/* FIXME: add OMAP1 support */
 	e->pgsz		= pgsz;
 	e->endian	= MMU_RAM_ENDIAN_LITTLE;
 	e->elsz		= MMU_RAM_ELSZ_8;
@@ -1131,6 +1206,7 @@
 
 	omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
 	omap_domain->dev = NULL;
+	oiommu->domain = NULL;
 }
 
 static void omap_iommu_detach_dev(struct iommu_domain *domain,
@@ -1309,6 +1385,8 @@
 
 	bus_set_iommu(&platform_bus_type, &omap_iommu_ops);
 
+	omap_iommu_debugfs_init();
+
 	return platform_driver_register(&omap_iommu_driver);
 }
 /* must be ready before omap3isp is probed */
@@ -1319,6 +1397,8 @@
 	kmem_cache_destroy(iopte_cachep);
 
 	platform_driver_unregister(&omap_iommu_driver);
+
+	omap_iommu_debugfs_exit();
 }
 module_exit(omap_iommu_exit);
 
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 4f1b68c..d736630 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -10,9 +10,8 @@
  * published by the Free Software Foundation.
  */
 
-#if defined(CONFIG_ARCH_OMAP1)
-#error "iommu for this processor not implemented yet"
-#endif
+#ifndef _OMAP_IOMMU_H
+#define _OMAP_IOMMU_H
 
 struct iotlb_entry {
 	u32 da;
@@ -30,10 +29,9 @@
 	const char	*name;
 	void __iomem	*regbase;
 	struct device	*dev;
-	void		*isr_priv;
 	struct iommu_domain *domain;
+	struct dentry	*debug_dir;
 
-	unsigned int	refcount;
 	spinlock_t	iommu_lock;	/* global for this whole object */
 
 	/*
@@ -67,34 +65,6 @@
 	};
 };
 
-/* architecture specific functions */
-struct iommu_functions {
-	unsigned long	version;
-
-	int (*enable)(struct omap_iommu *obj);
-	void (*disable)(struct omap_iommu *obj);
-	void (*set_twl)(struct omap_iommu *obj, bool on);
-	u32 (*fault_isr)(struct omap_iommu *obj, u32 *ra);
-
-	void (*tlb_read_cr)(struct omap_iommu *obj, struct cr_regs *cr);
-	void (*tlb_load_cr)(struct omap_iommu *obj, struct cr_regs *cr);
-
-	struct cr_regs *(*alloc_cr)(struct omap_iommu *obj,
-							struct iotlb_entry *e);
-	int (*cr_valid)(struct cr_regs *cr);
-	u32 (*cr_to_virt)(struct cr_regs *cr);
-	void (*cr_to_e)(struct cr_regs *cr, struct iotlb_entry *e);
-	ssize_t (*dump_cr)(struct omap_iommu *obj, struct cr_regs *cr,
-							char *buf);
-
-	u32 (*get_pte_attr)(struct iotlb_entry *e);
-
-	void (*save_ctx)(struct omap_iommu *obj);
-	void (*restore_ctx)(struct omap_iommu *obj);
-	ssize_t (*dump_ctx)(struct omap_iommu *obj, char *buf, ssize_t len);
-};
-
-#ifdef CONFIG_IOMMU_API
 /**
  * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
  * @dev: iommu client device
@@ -105,7 +75,6 @@
 
 	return arch_data->iommu_dev;
 }
-#endif
 
 /*
  * MMU Register offsets
@@ -133,6 +102,28 @@
 /*
  * MMU Register bit definitions
  */
+/* IRQSTATUS & IRQENABLE */
+#define MMU_IRQ_MULTIHITFAULT	(1 << 4)
+#define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
+#define MMU_IRQ_EMUMISS		(1 << 2)
+#define MMU_IRQ_TRANSLATIONFAULT	(1 << 1)
+#define MMU_IRQ_TLBMISS		(1 << 0)
+
+#define __MMU_IRQ_FAULT		\
+	(MMU_IRQ_MULTIHITFAULT | MMU_IRQ_EMUMISS | MMU_IRQ_TRANSLATIONFAULT)
+#define MMU_IRQ_MASK		\
+	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT | MMU_IRQ_TLBMISS)
+#define MMU_IRQ_TWL_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT)
+#define MMU_IRQ_TLB_MISS_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TLBMISS)
+
+/* MMU_CNTL */
+#define MMU_CNTL_SHIFT		1
+#define MMU_CNTL_MASK		(7 << MMU_CNTL_SHIFT)
+#define MMU_CNTL_EML_TLB	(1 << 3)
+#define MMU_CNTL_TWL_EN		(1 << 2)
+#define MMU_CNTL_MMU_EN		(1 << 1)
+
+/* CAM */
 #define MMU_CAM_VATAG_SHIFT	12
 #define MMU_CAM_VATAG_MASK \
 	((~0UL >> MMU_CAM_VATAG_SHIFT) << MMU_CAM_VATAG_SHIFT)
@@ -144,6 +135,7 @@
 #define MMU_CAM_PGSZ_4K		(2 << 0)
 #define MMU_CAM_PGSZ_16M	(3 << 0)
 
+/* RAM */
 #define MMU_RAM_PADDR_SHIFT	12
 #define MMU_RAM_PADDR_MASK \
 	((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT)
@@ -165,6 +157,12 @@
 
 #define MMU_GP_REG_BUS_ERR_BACK_EN	0x1
 
+#define get_cam_va_mask(pgsz)				\
+	(((pgsz) == MMU_CAM_PGSZ_16M) ? 0xff000000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_1M)  ? 0xfff00000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :	\
+	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
+
 /*
  * utilities for super page(16MB, 1MB, 64KB and 4KB)
  */
@@ -192,27 +190,25 @@
 /*
  * global functions
  */
-extern u32 omap_iommu_arch_version(void);
-
-extern void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e);
-
-extern int
-omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e);
-
-extern void omap_iommu_save_ctx(struct device *dev);
-extern void omap_iommu_restore_ctx(struct device *dev);
-
-extern int omap_foreach_iommu_device(void *data,
-				int (*fn)(struct device *, void *));
-
-extern int omap_install_iommu_arch(const struct iommu_functions *ops);
-extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
-
+#ifdef CONFIG_OMAP_IOMMU_DEBUG
 extern ssize_t
 omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
 extern size_t
 omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
 
+void omap_iommu_debugfs_init(void);
+void omap_iommu_debugfs_exit(void);
+
+void omap_iommu_debugfs_add(struct omap_iommu *obj);
+void omap_iommu_debugfs_remove(struct omap_iommu *obj);
+#else
+static inline void omap_iommu_debugfs_init(void) { }
+static inline void omap_iommu_debugfs_exit(void) { }
+
+static inline void omap_iommu_debugfs_add(struct omap_iommu *obj) { }
+static inline void omap_iommu_debugfs_remove(struct omap_iommu *obj) { }
+#endif
+
 /*
  * register accessors
  */
@@ -225,3 +221,5 @@
 {
 	__raw_writel(val, obj->regbase + offs);
 }
+
+#endif /* _OMAP_IOMMU_H */
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
deleted file mode 100644
index 5e1ea3b..0000000
--- a/drivers/iommu/omap-iommu2.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * omap iommu: omap2/3 architecture specific functions
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
- *		Paul Mundt and Toshihiro Kobayashi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/err.h>
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-#include <linux/omap-iommu.h>
-#include <linux/slab.h>
-#include <linux/stringify.h>
-#include <linux/platform_data/iommu-omap.h>
-
-#include "omap-iommu.h"
-
-/*
- * omap2 architecture specific register bit definitions
- */
-#define IOMMU_ARCH_VERSION	0x00000011
-
-/* IRQSTATUS & IRQENABLE */
-#define MMU_IRQ_MULTIHITFAULT	(1 << 4)
-#define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
-#define MMU_IRQ_EMUMISS		(1 << 2)
-#define MMU_IRQ_TRANSLATIONFAULT	(1 << 1)
-#define MMU_IRQ_TLBMISS		(1 << 0)
-
-#define __MMU_IRQ_FAULT		\
-	(MMU_IRQ_MULTIHITFAULT | MMU_IRQ_EMUMISS | MMU_IRQ_TRANSLATIONFAULT)
-#define MMU_IRQ_MASK		\
-	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT | MMU_IRQ_TLBMISS)
-#define MMU_IRQ_TWL_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TABLEWALKFAULT)
-#define MMU_IRQ_TLB_MISS_MASK	(__MMU_IRQ_FAULT | MMU_IRQ_TLBMISS)
-
-/* MMU_CNTL */
-#define MMU_CNTL_SHIFT		1
-#define MMU_CNTL_MASK		(7 << MMU_CNTL_SHIFT)
-#define MMU_CNTL_EML_TLB	(1 << 3)
-#define MMU_CNTL_TWL_EN		(1 << 2)
-#define MMU_CNTL_MMU_EN		(1 << 1)
-
-#define get_cam_va_mask(pgsz)				\
-	(((pgsz) == MMU_CAM_PGSZ_16M) ? 0xff000000 :	\
-	 ((pgsz) == MMU_CAM_PGSZ_1M)  ? 0xfff00000 :	\
-	 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :	\
-	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
-
-/* IOMMU errors */
-#define OMAP_IOMMU_ERR_TLB_MISS		(1 << 0)
-#define OMAP_IOMMU_ERR_TRANS_FAULT	(1 << 1)
-#define OMAP_IOMMU_ERR_EMU_MISS		(1 << 2)
-#define OMAP_IOMMU_ERR_TBLWALK_FAULT	(1 << 3)
-#define OMAP_IOMMU_ERR_MULTIHIT_FAULT	(1 << 4)
-
-static void __iommu_set_twl(struct omap_iommu *obj, bool on)
-{
-	u32 l = iommu_read_reg(obj, MMU_CNTL);
-
-	if (on)
-		iommu_write_reg(obj, MMU_IRQ_TWL_MASK, MMU_IRQENABLE);
-	else
-		iommu_write_reg(obj, MMU_IRQ_TLB_MISS_MASK, MMU_IRQENABLE);
-
-	l &= ~MMU_CNTL_MASK;
-	if (on)
-		l |= (MMU_CNTL_MMU_EN | MMU_CNTL_TWL_EN);
-	else
-		l |= (MMU_CNTL_MMU_EN);
-
-	iommu_write_reg(obj, l, MMU_CNTL);
-}
-
-
-static int omap2_iommu_enable(struct omap_iommu *obj)
-{
-	u32 l, pa;
-
-	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
-		return -EINVAL;
-
-	pa = virt_to_phys(obj->iopgd);
-	if (!IS_ALIGNED(pa, SZ_16K))
-		return -EINVAL;
-
-	l = iommu_read_reg(obj, MMU_REVISION);
-	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
-		 (l >> 4) & 0xf, l & 0xf);
-
-	iommu_write_reg(obj, pa, MMU_TTB);
-
-	if (obj->has_bus_err_back)
-		iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
-
-	__iommu_set_twl(obj, true);
-
-	return 0;
-}
-
-static void omap2_iommu_disable(struct omap_iommu *obj)
-{
-	u32 l = iommu_read_reg(obj, MMU_CNTL);
-
-	l &= ~MMU_CNTL_MASK;
-	iommu_write_reg(obj, l, MMU_CNTL);
-
-	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
-}
-
-static void omap2_iommu_set_twl(struct omap_iommu *obj, bool on)
-{
-	__iommu_set_twl(obj, false);
-}
-
-static u32 omap2_iommu_fault_isr(struct omap_iommu *obj, u32 *ra)
-{
-	u32 stat, da;
-	u32 errs = 0;
-
-	stat = iommu_read_reg(obj, MMU_IRQSTATUS);
-	stat &= MMU_IRQ_MASK;
-	if (!stat) {
-		*ra = 0;
-		return 0;
-	}
-
-	da = iommu_read_reg(obj, MMU_FAULT_AD);
-	*ra = da;
-
-	if (stat & MMU_IRQ_TLBMISS)
-		errs |= OMAP_IOMMU_ERR_TLB_MISS;
-	if (stat & MMU_IRQ_TRANSLATIONFAULT)
-		errs |= OMAP_IOMMU_ERR_TRANS_FAULT;
-	if (stat & MMU_IRQ_EMUMISS)
-		errs |= OMAP_IOMMU_ERR_EMU_MISS;
-	if (stat & MMU_IRQ_TABLEWALKFAULT)
-		errs |= OMAP_IOMMU_ERR_TBLWALK_FAULT;
-	if (stat & MMU_IRQ_MULTIHITFAULT)
-		errs |= OMAP_IOMMU_ERR_MULTIHIT_FAULT;
-	iommu_write_reg(obj, stat, MMU_IRQSTATUS);
-
-	return errs;
-}
-
-static void omap2_tlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
-{
-	cr->cam = iommu_read_reg(obj, MMU_READ_CAM);
-	cr->ram = iommu_read_reg(obj, MMU_READ_RAM);
-}
-
-static void omap2_tlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
-{
-	iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM);
-	iommu_write_reg(obj, cr->ram, MMU_RAM);
-}
-
-static u32 omap2_cr_to_virt(struct cr_regs *cr)
-{
-	u32 page_size = cr->cam & MMU_CAM_PGSZ_MASK;
-	u32 mask = get_cam_va_mask(cr->cam & page_size);
-
-	return cr->cam & mask;
-}
-
-static struct cr_regs *omap2_alloc_cr(struct omap_iommu *obj,
-						struct iotlb_entry *e)
-{
-	struct cr_regs *cr;
-
-	if (e->da & ~(get_cam_va_mask(e->pgsz))) {
-		dev_err(obj->dev, "%s:\twrong alignment: %08x\n", __func__,
-			e->da);
-		return ERR_PTR(-EINVAL);
-	}
-
-	cr = kmalloc(sizeof(*cr), GFP_KERNEL);
-	if (!cr)
-		return ERR_PTR(-ENOMEM);
-
-	cr->cam = (e->da & MMU_CAM_VATAG_MASK) | e->prsvd | e->pgsz | e->valid;
-	cr->ram = e->pa | e->endian | e->elsz | e->mixed;
-
-	return cr;
-}
-
-static inline int omap2_cr_valid(struct cr_regs *cr)
-{
-	return cr->cam & MMU_CAM_V;
-}
-
-static u32 omap2_get_pte_attr(struct iotlb_entry *e)
-{
-	u32 attr;
-
-	attr = e->mixed << 5;
-	attr |= e->endian;
-	attr |= e->elsz >> 3;
-	attr <<= (((e->pgsz == MMU_CAM_PGSZ_4K) ||
-			(e->pgsz == MMU_CAM_PGSZ_64K)) ? 0 : 6);
-	return attr;
-}
-
-static ssize_t
-omap2_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, char *buf)
-{
-	char *p = buf;
-
-	/* FIXME: Need more detail analysis of cam/ram */
-	p += sprintf(p, "%08x %08x %01x\n", cr->cam, cr->ram,
-					(cr->cam & MMU_CAM_P) ? 1 : 0);
-
-	return p - buf;
-}
-
-#define pr_reg(name)							\
-	do {								\
-		ssize_t bytes;						\
-		const char *str = "%20s: %08x\n";			\
-		const int maxcol = 32;					\
-		bytes = snprintf(p, maxcol, str, __stringify(name),	\
-				 iommu_read_reg(obj, MMU_##name));	\
-		p += bytes;						\
-		len -= bytes;						\
-		if (len < maxcol)					\
-			goto out;					\
-	} while (0)
-
-static ssize_t
-omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
-{
-	char *p = buf;
-
-	pr_reg(REVISION);
-	pr_reg(IRQSTATUS);
-	pr_reg(IRQENABLE);
-	pr_reg(WALKING_ST);
-	pr_reg(CNTL);
-	pr_reg(FAULT_AD);
-	pr_reg(TTB);
-	pr_reg(LOCK);
-	pr_reg(LD_TLB);
-	pr_reg(CAM);
-	pr_reg(RAM);
-	pr_reg(GFLUSH);
-	pr_reg(FLUSH_ENTRY);
-	pr_reg(READ_CAM);
-	pr_reg(READ_RAM);
-	pr_reg(EMU_FAULT_AD);
-out:
-	return p - buf;
-}
-
-static void omap2_iommu_save_ctx(struct omap_iommu *obj)
-{
-	int i;
-	u32 *p = obj->ctx;
-
-	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-		p[i] = iommu_read_reg(obj, i * sizeof(u32));
-		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
-	}
-
-	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
-}
-
-static void omap2_iommu_restore_ctx(struct omap_iommu *obj)
-{
-	int i;
-	u32 *p = obj->ctx;
-
-	for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-		iommu_write_reg(obj, p[i], i * sizeof(u32));
-		dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
-	}
-
-	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
-}
-
-static void omap2_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
-{
-	e->da		= cr->cam & MMU_CAM_VATAG_MASK;
-	e->pa		= cr->ram & MMU_RAM_PADDR_MASK;
-	e->valid	= cr->cam & MMU_CAM_V;
-	e->pgsz		= cr->cam & MMU_CAM_PGSZ_MASK;
-	e->endian	= cr->ram & MMU_RAM_ENDIAN_MASK;
-	e->elsz		= cr->ram & MMU_RAM_ELSZ_MASK;
-	e->mixed	= cr->ram & MMU_RAM_MIXED;
-}
-
-static const struct iommu_functions omap2_iommu_ops = {
-	.version	= IOMMU_ARCH_VERSION,
-
-	.enable		= omap2_iommu_enable,
-	.disable	= omap2_iommu_disable,
-	.set_twl	= omap2_iommu_set_twl,
-	.fault_isr	= omap2_iommu_fault_isr,
-
-	.tlb_read_cr	= omap2_tlb_read_cr,
-	.tlb_load_cr	= omap2_tlb_load_cr,
-
-	.cr_to_e	= omap2_cr_to_e,
-	.cr_to_virt	= omap2_cr_to_virt,
-	.alloc_cr	= omap2_alloc_cr,
-	.cr_valid	= omap2_cr_valid,
-	.dump_cr	= omap2_dump_cr,
-
-	.get_pte_attr	= omap2_get_pte_attr,
-
-	.save_ctx	= omap2_iommu_save_ctx,
-	.restore_ctx	= omap2_iommu_restore_ctx,
-	.dump_ctx	= omap2_iommu_dump_ctx,
-};
-
-static int __init omap2_iommu_init(void)
-{
-	return omap_install_iommu_arch(&omap2_iommu_ops);
-}
-module_init(omap2_iommu_init);
-
-static void __exit omap2_iommu_exit(void)
-{
-	omap_uninstall_iommu_arch(&omap2_iommu_ops);
-}
-module_exit(omap2_iommu_exit);
-
-MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
-MODULE_DESCRIPTION("omap iommu: omap2/3 architecture specific functions");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
new file mode 100644
index 0000000..b2023af
--- /dev/null
+++ b/drivers/iommu/rockchip-iommu.c
@@ -0,0 +1,1038 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/** MMU register offsets */
+#define RK_MMU_DTE_ADDR		0x00	/* Directory table address */
+#define RK_MMU_STATUS		0x04
+#define RK_MMU_COMMAND		0x08
+#define RK_MMU_PAGE_FAULT_ADDR	0x0C	/* IOVA of last page fault */
+#define RK_MMU_ZAP_ONE_LINE	0x10	/* Shootdown one IOTLB entry */
+#define RK_MMU_INT_RAWSTAT	0x14	/* IRQ status ignoring mask */
+#define RK_MMU_INT_CLEAR	0x18	/* Acknowledge and re-arm irq */
+#define RK_MMU_INT_MASK		0x1C	/* IRQ enable */
+#define RK_MMU_INT_STATUS	0x20	/* IRQ status after masking */
+#define RK_MMU_AUTO_GATING	0x24
+
+#define DTE_ADDR_DUMMY		0xCAFEBABE
+#define FORCE_RESET_TIMEOUT	100	/* ms */
+
+/* RK_MMU_STATUS fields */
+#define RK_MMU_STATUS_PAGING_ENABLED       BIT(0)
+#define RK_MMU_STATUS_PAGE_FAULT_ACTIVE    BIT(1)
+#define RK_MMU_STATUS_STALL_ACTIVE         BIT(2)
+#define RK_MMU_STATUS_IDLE                 BIT(3)
+#define RK_MMU_STATUS_REPLAY_BUFFER_EMPTY  BIT(4)
+#define RK_MMU_STATUS_PAGE_FAULT_IS_WRITE  BIT(5)
+#define RK_MMU_STATUS_STALL_NOT_ACTIVE     BIT(31)
+
+/* RK_MMU_COMMAND command values */
+#define RK_MMU_CMD_ENABLE_PAGING    0  /* Enable memory translation */
+#define RK_MMU_CMD_DISABLE_PAGING   1  /* Disable memory translation */
+#define RK_MMU_CMD_ENABLE_STALL     2  /* Stall paging to allow other cmds */
+#define RK_MMU_CMD_DISABLE_STALL    3  /* Stop stall re-enables paging */
+#define RK_MMU_CMD_ZAP_CACHE        4  /* Shoot down entire IOTLB */
+#define RK_MMU_CMD_PAGE_FAULT_DONE  5  /* Clear page fault */
+#define RK_MMU_CMD_FORCE_RESET      6  /* Reset all registers */
+
+/* RK_MMU_INT_* register fields */
+#define RK_MMU_IRQ_PAGE_FAULT    0x01  /* page fault */
+#define RK_MMU_IRQ_BUS_ERROR     0x02  /* bus read error */
+#define RK_MMU_IRQ_MASK          (RK_MMU_IRQ_PAGE_FAULT | RK_MMU_IRQ_BUS_ERROR)
+
+#define NUM_DT_ENTRIES 1024
+#define NUM_PT_ENTRIES 1024
+
+#define SPAGE_ORDER 12
+#define SPAGE_SIZE (1 << SPAGE_ORDER)
+
+ /*
+  * Support mapping any size that fits in one page table:
+  *   4 KiB to 4 MiB
+  */
+#define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
+
+#define IOMMU_REG_POLL_COUNT_FAST 1000
+
+struct rk_iommu_domain {
+	struct list_head iommus;
+	u32 *dt; /* page directory table */
+	spinlock_t iommus_lock; /* lock for iommus list */
+	spinlock_t dt_lock; /* lock for modifying page directory table */
+};
+
+struct rk_iommu {
+	struct device *dev;
+	void __iomem *base;
+	int irq;
+	struct list_head node; /* entry in rk_iommu_domain.iommus */
+	struct iommu_domain *domain; /* domain to which iommu is attached */
+};
+
+static inline void rk_table_flush(u32 *va, unsigned int count)
+{
+	phys_addr_t pa_start = virt_to_phys(va);
+	phys_addr_t pa_end = virt_to_phys(va + count);
+	size_t size = pa_end - pa_start;
+
+	__cpuc_flush_dcache_area(va, size);
+	outer_flush_range(pa_start, pa_end);
+}
+
+/**
+ * Inspired by _wait_for in intel_drv.h
+ * This is NOT safe for use in interrupt context.
+ *
+ * Note that it's important that we check the condition again after having
+ * timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define rk_wait_for(COND, MS) ({ \
+	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;	\
+	int ret__ = 0;							\
+	while (!(COND)) {						\
+		if (time_after(jiffies, timeout__)) {			\
+			ret__ = (COND) ? 0 : -ETIMEDOUT;		\
+			break;						\
+		}							\
+		usleep_range(50, 100);					\
+	}								\
+	ret__;								\
+})
+
+/*
+ * The Rockchip rk3288 iommu uses a 2-level page table.
+ * The first level is the "Directory Table" (DT).
+ * The DT consists of 1024 4-byte Directory Table Entries (DTEs), each pointing
+ * to a "Page Table".
+ * The second level is the 1024 Page Tables (PT).
+ * Each PT consists of 1024 4-byte Page Table Entries (PTEs), each pointing to
+ * a 4 KB page of physical memory.
+ *
+ * The DT and each PT fits in a single 4 KB page (4-bytes * 1024 entries).
+ * Each iommu device has a MMU_DTE_ADDR register that contains the physical
+ * address of the start of the DT page.
+ *
+ * The structure of the page table is as follows:
+ *
+ *                   DT
+ * MMU_DTE_ADDR -> +-----+
+ *                 |     |
+ *                 +-----+     PT
+ *                 | DTE | -> +-----+
+ *                 +-----+    |     |     Memory
+ *                 |     |    +-----+     Page
+ *                 |     |    | PTE | -> +-----+
+ *                 +-----+    +-----+    |     |
+ *                            |     |    |     |
+ *                            |     |    |     |
+ *                            +-----+    |     |
+ *                                       |     |
+ *                                       |     |
+ *                                       +-----+
+ */
+
+/*
+ * Each DTE has a PT address and a valid bit:
+ * +---------------------+-----------+-+
+ * | PT address          | Reserved  |V|
+ * +---------------------+-----------+-+
+ *  31:12 - PT address (PTs always starts on a 4 KB boundary)
+ *  11: 1 - Reserved
+ *      0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK    0xfffff000
+#define RK_DTE_PT_VALID           BIT(0)
+
+static inline phys_addr_t rk_dte_pt_address(u32 dte)
+{
+	return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
+}
+
+static inline bool rk_dte_is_pt_valid(u32 dte)
+{
+	return dte & RK_DTE_PT_VALID;
+}
+
+static u32 rk_mk_dte(u32 *pt)
+{
+	phys_addr_t pt_phys = virt_to_phys(pt);
+	return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
+}
+
+/*
+ * Each PTE has a Page address, some flags and a valid bit:
+ * +---------------------+---+-------+-+
+ * | Page address        |Rsv| Flags |V|
+ * +---------------------+---+-------+-+
+ *  31:12 - Page address (Pages always start on a 4 KB boundary)
+ *  11: 9 - Reserved
+ *   8: 1 - Flags
+ *      8 - Read allocate - allocate cache space on read misses
+ *      7 - Read cache - enable cache & prefetch of data
+ *      6 - Write buffer - enable delaying writes on their way to memory
+ *      5 - Write allocate - allocate cache space on write misses
+ *      4 - Write cache - different writes can be merged together
+ *      3 - Override cache attributes
+ *          if 1, bits 4-8 control cache attributes
+ *          if 0, the system bus defaults are used
+ *      2 - Writable
+ *      1 - Readable
+ *      0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_ADDRESS_MASK  0xfffff000
+#define RK_PTE_PAGE_FLAGS_MASK    0x000001fe
+#define RK_PTE_PAGE_WRITABLE      BIT(2)
+#define RK_PTE_PAGE_READABLE      BIT(1)
+#define RK_PTE_PAGE_VALID         BIT(0)
+
+static inline phys_addr_t rk_pte_page_address(u32 pte)
+{
+	return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
+}
+
+static inline bool rk_pte_is_page_valid(u32 pte)
+{
+	return pte & RK_PTE_PAGE_VALID;
+}
+
+/* TODO: set cache flags per prot IOMMU_CACHE */
+static u32 rk_mk_pte(phys_addr_t page, int prot)
+{
+	u32 flags = 0;
+	flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0;
+	flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0;
+	page &= RK_PTE_PAGE_ADDRESS_MASK;
+	return page | flags | RK_PTE_PAGE_VALID;
+}
+
+static u32 rk_mk_pte_invalid(u32 pte)
+{
+	return pte & ~RK_PTE_PAGE_VALID;
+}
+
+/*
+ * rk3288 iova (IOMMU Virtual Address) format
+ *  31       22.21       12.11          0
+ * +-----------+-----------+-------------+
+ * | DTE index | PTE index | Page offset |
+ * +-----------+-----------+-------------+
+ *  31:22 - DTE index   - index of DTE in DT
+ *  21:12 - PTE index   - index of PTE in PT @ DTE.pt_address
+ *  11: 0 - Page offset - offset into page @ PTE.page_address
+ */
+#define RK_IOVA_DTE_MASK    0xffc00000
+#define RK_IOVA_DTE_SHIFT   22
+#define RK_IOVA_PTE_MASK    0x003ff000
+#define RK_IOVA_PTE_SHIFT   12
+#define RK_IOVA_PAGE_MASK   0x00000fff
+#define RK_IOVA_PAGE_SHIFT  0
+
+static u32 rk_iova_dte_index(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_DTE_MASK) >> RK_IOVA_DTE_SHIFT;
+}
+
+static u32 rk_iova_pte_index(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT;
+}
+
+static u32 rk_iova_page_offset(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT;
+}
+
+static u32 rk_iommu_read(struct rk_iommu *iommu, u32 offset)
+{
+	return readl(iommu->base + offset);
+}
+
+static void rk_iommu_write(struct rk_iommu *iommu, u32 offset, u32 value)
+{
+	writel(value, iommu->base + offset);
+}
+
+static void rk_iommu_command(struct rk_iommu *iommu, u32 command)
+{
+	writel(command, iommu->base + RK_MMU_COMMAND);
+}
+
+static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
+			       size_t size)
+{
+	dma_addr_t iova_end = iova + size;
+	/*
+	 * TODO(djkurtz): Figure out when it is more efficient to shootdown the
+	 * entire iotlb rather than iterate over individual iovas.
+	 */
+	for (; iova < iova_end; iova += SPAGE_SIZE)
+		rk_iommu_write(iommu, RK_MMU_ZAP_ONE_LINE, iova);
+}
+
+static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
+{
+	return rk_iommu_read(iommu, RK_MMU_STATUS) & RK_MMU_STATUS_STALL_ACTIVE;
+}
+
+static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
+{
+	return rk_iommu_read(iommu, RK_MMU_STATUS) &
+			     RK_MMU_STATUS_PAGING_ENABLED;
+}
+
+static int rk_iommu_enable_stall(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (rk_iommu_is_stall_active(iommu))
+		return 0;
+
+	/* Stall can only be enabled if paging is enabled */
+	if (!rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL);
+
+	ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_disable_stall(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (!rk_iommu_is_stall_active(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL);
+
+	ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_enable_paging(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING);
+
+	ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_disable_paging(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (!rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING);
+
+	ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_force_reset(struct rk_iommu *iommu)
+{
+	int ret;
+	u32 dte_addr;
+
+	/*
+	 * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
+	 * and verifying that upper 5 nybbles are read back.
+	 */
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
+
+	dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
+	if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
+		dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
+		return -EFAULT;
+	}
+
+	rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
+
+	ret = rk_wait_for(rk_iommu_read(iommu, RK_MMU_DTE_ADDR) == 0x00000000,
+			  FORCE_RESET_TIMEOUT);
+	if (ret)
+		dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+
+	return ret;
+}
+
+static void log_iova(struct rk_iommu *iommu, dma_addr_t iova)
+{
+	u32 dte_index, pte_index, page_offset;
+	u32 mmu_dte_addr;
+	phys_addr_t mmu_dte_addr_phys, dte_addr_phys;
+	u32 *dte_addr;
+	u32 dte;
+	phys_addr_t pte_addr_phys = 0;
+	u32 *pte_addr = NULL;
+	u32 pte = 0;
+	phys_addr_t page_addr_phys = 0;
+	u32 page_flags = 0;
+
+	dte_index = rk_iova_dte_index(iova);
+	pte_index = rk_iova_pte_index(iova);
+	page_offset = rk_iova_page_offset(iova);
+
+	mmu_dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
+	mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
+
+	dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
+	dte_addr = phys_to_virt(dte_addr_phys);
+	dte = *dte_addr;
+
+	if (!rk_dte_is_pt_valid(dte))
+		goto print_it;
+
+	pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4);
+	pte_addr = phys_to_virt(pte_addr_phys);
+	pte = *pte_addr;
+
+	if (!rk_pte_is_page_valid(pte))
+		goto print_it;
+
+	page_addr_phys = rk_pte_page_address(pte) + page_offset;
+	page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
+
+print_it:
+	dev_err(iommu->dev, "iova = %pad: dte_index: %#03x pte_index: %#03x page_offset: %#03x\n",
+		&iova, dte_index, pte_index, page_offset);
+	dev_err(iommu->dev, "mmu_dte_addr: %pa dte@%pa: %#08x valid: %u pte@%pa: %#08x valid: %u page@%pa flags: %#03x\n",
+		&mmu_dte_addr_phys, &dte_addr_phys, dte,
+		rk_dte_is_pt_valid(dte), &pte_addr_phys, pte,
+		rk_pte_is_page_valid(pte), &page_addr_phys, page_flags);
+}
+
+static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
+{
+	struct rk_iommu *iommu = dev_id;
+	u32 status;
+	u32 int_status;
+	dma_addr_t iova;
+
+	int_status = rk_iommu_read(iommu, RK_MMU_INT_STATUS);
+	if (int_status == 0)
+		return IRQ_NONE;
+
+	iova = rk_iommu_read(iommu, RK_MMU_PAGE_FAULT_ADDR);
+
+	if (int_status & RK_MMU_IRQ_PAGE_FAULT) {
+		int flags;
+
+		status = rk_iommu_read(iommu, RK_MMU_STATUS);
+		flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ?
+				IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+
+		dev_err(iommu->dev, "Page fault at %pad of type %s\n",
+			&iova,
+			(flags == IOMMU_FAULT_WRITE) ? "write" : "read");
+
+		log_iova(iommu, iova);
+
+		/*
+		 * Report page fault to any installed handlers.
+		 * Ignore the return code, though, since we always zap cache
+		 * and clear the page fault anyway.
+		 */
+		if (iommu->domain)
+			report_iommu_fault(iommu->domain, iommu->dev, iova,
+					   flags);
+		else
+			dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n");
+
+		rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_command(iommu, RK_MMU_CMD_PAGE_FAULT_DONE);
+	}
+
+	if (int_status & RK_MMU_IRQ_BUS_ERROR)
+		dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova);
+
+	if (int_status & ~RK_MMU_IRQ_MASK)
+		dev_err(iommu->dev, "unexpected int_status: %#08x\n",
+			int_status);
+
+	rk_iommu_write(iommu, RK_MMU_INT_CLEAR, int_status);
+
+	return IRQ_HANDLED;
+}
+
+static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
+					 dma_addr_t iova)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	phys_addr_t pt_phys, phys = 0;
+	u32 dte, pte;
+	u32 *page_table;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	dte = rk_domain->dt[rk_iova_dte_index(iova)];
+	if (!rk_dte_is_pt_valid(dte))
+		goto out;
+
+	pt_phys = rk_dte_pt_address(dte);
+	page_table = (u32 *)phys_to_virt(pt_phys);
+	pte = page_table[rk_iova_pte_index(iova)];
+	if (!rk_pte_is_page_valid(pte))
+		goto out;
+
+	phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova);
+out:
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	return phys;
+}
+
+static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
+			      dma_addr_t iova, size_t size)
+{
+	struct list_head *pos;
+	unsigned long flags;
+
+	/* shootdown these iova from all iommus using this domain */
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_for_each(pos, &rk_domain->iommus) {
+		struct rk_iommu *iommu;
+		iommu = list_entry(pos, struct rk_iommu, node);
+		rk_iommu_zap_lines(iommu, iova, size);
+	}
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+}
+
+static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
+				  dma_addr_t iova)
+{
+	u32 *page_table, *dte_addr;
+	u32 dte;
+	phys_addr_t pt_phys;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)];
+	dte = *dte_addr;
+	if (rk_dte_is_pt_valid(dte))
+		goto done;
+
+	page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32);
+	if (!page_table)
+		return ERR_PTR(-ENOMEM);
+
+	dte = rk_mk_dte(page_table);
+	*dte_addr = dte;
+
+	rk_table_flush(page_table, NUM_PT_ENTRIES);
+	rk_table_flush(dte_addr, 1);
+
+	/*
+	 * Zap the first iova of newly allocated page table so iommu evicts
+	 * old cached value of new dte from the iotlb.
+	 */
+	rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE);
+
+done:
+	pt_phys = rk_dte_pt_address(dte);
+	return (u32 *)phys_to_virt(pt_phys);
+}
+
+static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
+				  u32 *pte_addr, dma_addr_t iova, size_t size)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	for (pte_count = 0; pte_count < pte_total; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+		if (!rk_pte_is_page_valid(pte))
+			break;
+
+		pte_addr[pte_count] = rk_mk_pte_invalid(pte);
+	}
+
+	rk_table_flush(pte_addr, pte_count);
+
+	return pte_count * SPAGE_SIZE;
+}
+
+static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
+			     dma_addr_t iova, phys_addr_t paddr, size_t size,
+			     int prot)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+	phys_addr_t page_phys;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	for (pte_count = 0; pte_count < pte_total; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+
+		if (rk_pte_is_page_valid(pte))
+			goto unwind;
+
+		pte_addr[pte_count] = rk_mk_pte(paddr, prot);
+
+		paddr += SPAGE_SIZE;
+	}
+
+	rk_table_flush(pte_addr, pte_count);
+
+	return 0;
+unwind:
+	/* Unmap the range of iovas that we just mapped */
+	rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE);
+
+	iova += pte_count * SPAGE_SIZE;
+	page_phys = rk_pte_page_address(pte_addr[pte_count]);
+	pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n",
+	       &iova, &page_phys, &paddr, prot);
+
+	return -EADDRINUSE;
+}
+
+static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
+			phys_addr_t paddr, size_t size, int prot)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	dma_addr_t iova = (dma_addr_t)_iova;
+	u32 *page_table, *pte_addr;
+	int ret;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	/*
+	 * pgsize_bitmap specifies iova sizes that fit in one page table
+	 * (1024 4-KiB pages = 4 MiB).
+	 * So, size will always be 4096 <= size <= 4194304.
+	 * Since iommu_map() guarantees that both iova and size will be
+	 * aligned, we will always only be mapping from a single dte here.
+	 */
+	page_table = rk_dte_get_page_table(rk_domain, iova);
+	if (IS_ERR(page_table)) {
+		spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+		return PTR_ERR(page_table);
+	}
+
+	pte_addr = &page_table[rk_iova_pte_index(iova)];
+	ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot);
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	return ret;
+}
+
+static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
+			     size_t size)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	dma_addr_t iova = (dma_addr_t)_iova;
+	phys_addr_t pt_phys;
+	u32 dte;
+	u32 *pte_addr;
+	size_t unmap_size;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	/*
+	 * pgsize_bitmap specifies iova sizes that fit in one page table
+	 * (1024 4-KiB pages = 4 MiB).
+	 * So, size will always be 4096 <= size <= 4194304.
+	 * Since iommu_unmap() guarantees that both iova and size will be
+	 * aligned, we will always only be unmapping from a single dte here.
+	 */
+	dte = rk_domain->dt[rk_iova_dte_index(iova)];
+	/* Just return 0 if iova is unmapped */
+	if (!rk_dte_is_pt_valid(dte)) {
+		spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+		return 0;
+	}
+
+	pt_phys = rk_dte_pt_address(dte);
+	pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
+	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size);
+
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	/* Shootdown iotlb entries for iova range that was just unmapped */
+	rk_iommu_zap_iova(rk_domain, iova, unmap_size);
+
+	return unmap_size;
+}
+
+static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
+{
+	struct iommu_group *group;
+	struct device *iommu_dev;
+	struct rk_iommu *rk_iommu;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		return NULL;
+	iommu_dev = iommu_group_get_iommudata(group);
+	rk_iommu = dev_get_drvdata(iommu_dev);
+	iommu_group_put(group);
+
+	return rk_iommu;
+}
+
+static int rk_iommu_attach_device(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+	phys_addr_t dte_addr;
+
+	/*
+	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
+	 * Such a device does not belong to an iommu group.
+	 */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return 0;
+
+	ret = rk_iommu_enable_stall(iommu);
+	if (ret)
+		return ret;
+
+	ret = rk_iommu_force_reset(iommu);
+	if (ret)
+		return ret;
+
+	iommu->domain = domain;
+
+	ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq,
+			       IRQF_SHARED, dev_name(dev), iommu);
+	if (ret)
+		return ret;
+
+	dte_addr = virt_to_phys(rk_domain->dt);
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, dte_addr);
+	rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
+	rk_iommu_write(iommu, RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+
+	ret = rk_iommu_enable_paging(iommu);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_add_tail(&iommu->node, &rk_domain->iommus);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	dev_info(dev, "Attached to iommu domain\n");
+
+	rk_iommu_disable_stall(iommu);
+
+	return 0;
+}
+
+static void rk_iommu_detach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+
+	/* Allow 'virtual devices' (eg drm) to detach from domain */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_del_init(&iommu->node);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	/* Ignore error while disabling, just keep going */
+	rk_iommu_enable_stall(iommu);
+	rk_iommu_disable_paging(iommu);
+	rk_iommu_write(iommu, RK_MMU_INT_MASK, 0);
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, 0);
+	rk_iommu_disable_stall(iommu);
+
+	devm_free_irq(dev, iommu->irq, iommu);
+
+	iommu->domain = NULL;
+
+	dev_info(dev, "Detached from iommu domain\n");
+}
+
+static int rk_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct rk_iommu_domain *rk_domain;
+
+	rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
+	if (!rk_domain)
+		return -ENOMEM;
+
+	/*
+	 * rk32xx iommus use a 2 level pagetable.
+	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
+	 * Allocate one 4 KiB page for each table.
+	 */
+	rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
+	if (!rk_domain->dt)
+		goto err_dt;
+
+	rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES);
+
+	spin_lock_init(&rk_domain->iommus_lock);
+	spin_lock_init(&rk_domain->dt_lock);
+	INIT_LIST_HEAD(&rk_domain->iommus);
+
+	domain->priv = rk_domain;
+
+	return 0;
+err_dt:
+	kfree(rk_domain);
+	return -ENOMEM;
+}
+
+static void rk_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	int i;
+
+	WARN_ON(!list_empty(&rk_domain->iommus));
+
+	for (i = 0; i < NUM_DT_ENTRIES; i++) {
+		u32 dte = rk_domain->dt[i];
+		if (rk_dte_is_pt_valid(dte)) {
+			phys_addr_t pt_phys = rk_dte_pt_address(dte);
+			u32 *page_table = phys_to_virt(pt_phys);
+			free_page((unsigned long)page_table);
+		}
+	}
+
+	free_page((unsigned long)rk_domain->dt);
+	kfree(domain->priv);
+	domain->priv = NULL;
+}
+
+static bool rk_iommu_is_dev_iommu_master(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	/*
+	 * An iommu master has an iommus property containing a list of phandles
+	 * to iommu nodes, each with an #iommu-cells property with value 0.
+	 */
+	ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells");
+	return (ret > 0);
+}
+
+static int rk_iommu_group_set_iommudata(struct iommu_group *group,
+					struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct platform_device *pd;
+	int ret;
+	struct of_phandle_args args;
+
+	/*
+	 * An iommu master has an iommus property containing a list of phandles
+	 * to iommu nodes, each with an #iommu-cells property with value 0.
+	 */
+	ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
+					 &args);
+	if (ret) {
+		dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n",
+			np->full_name, ret);
+		return ret;
+	}
+	if (args.args_count != 0) {
+		dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n",
+			args.np->full_name, args.args_count);
+		return -EINVAL;
+	}
+
+	pd = of_find_device_by_node(args.np);
+	of_node_put(args.np);
+	if (!pd) {
+		dev_err(dev, "iommu %s not found\n", args.np->full_name);
+		return -EPROBE_DEFER;
+	}
+
+	/* TODO(djkurtz): handle multiple slave iommus for a single master */
+	iommu_group_set_iommudata(group, &pd->dev, NULL);
+
+	return 0;
+}
+
+static int rk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	int ret;
+
+	if (!rk_iommu_is_dev_iommu_master(dev))
+		return -ENODEV;
+
+	group = iommu_group_get(dev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group)) {
+			dev_err(dev, "Failed to allocate IOMMU group\n");
+			return PTR_ERR(group);
+		}
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	if (ret)
+		goto err_put_group;
+
+	ret = rk_iommu_group_set_iommudata(group, dev);
+	if (ret)
+		goto err_remove_device;
+
+	iommu_group_put(group);
+
+	return 0;
+
+err_remove_device:
+	iommu_group_remove_device(dev);
+err_put_group:
+	iommu_group_put(group);
+	return ret;
+}
+
+static void rk_iommu_remove_device(struct device *dev)
+{
+	if (!rk_iommu_is_dev_iommu_master(dev))
+		return;
+
+	iommu_group_remove_device(dev);
+}
+
+static const struct iommu_ops rk_iommu_ops = {
+	.domain_init = rk_iommu_domain_init,
+	.domain_destroy = rk_iommu_domain_destroy,
+	.attach_dev = rk_iommu_attach_device,
+	.detach_dev = rk_iommu_detach_device,
+	.map = rk_iommu_map,
+	.unmap = rk_iommu_unmap,
+	.add_device = rk_iommu_add_device,
+	.remove_device = rk_iommu_remove_device,
+	.iova_to_phys = rk_iommu_iova_to_phys,
+	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
+};
+
+static int rk_iommu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rk_iommu *iommu;
+	struct resource *res;
+
+	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, iommu);
+	iommu->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iommu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iommu->base))
+		return PTR_ERR(iommu->base);
+
+	iommu->irq = platform_get_irq(pdev, 0);
+	if (iommu->irq < 0) {
+		dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int rk_iommu_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rk_iommu_dt_ids[] = {
+	{ .compatible = "rockchip,iommu" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
+#endif
+
+static struct platform_driver rk_iommu_driver = {
+	.probe = rk_iommu_probe,
+	.remove = rk_iommu_remove,
+	.driver = {
+		   .name = "rk_iommu",
+		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(rk_iommu_dt_ids),
+	},
+};
+
+static int __init rk_iommu_init(void)
+{
+	int ret;
+
+	ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&rk_iommu_driver);
+}
+static void __exit rk_iommu_exit(void)
+{
+	platform_driver_unregister(&rk_iommu_driver);
+}
+
+subsys_initcall(rk_iommu_init);
+module_exit(rk_iommu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for Rockchip");
+MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
+MODULE_ALIAS("platform:rockchip-iommu");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 4f4c2a7..feb29c4 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -684,10 +684,9 @@
 					 struct fixed_phy_status *status)
 {
 	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	u32 link, duplex, pause, speed;
+	u32 duplex, pause, speed;
 	u32 reg;
 
-	link = core_readl(priv, CORE_LNKSTS);
 	duplex = core_readl(priv, CORE_DUPSTS);
 	pause = core_readl(priv, CORE_PAUSESTS);
 	speed = core_readl(priv, CORE_SPDSTS);
@@ -701,22 +700,26 @@
 	 * which means that we need to force the link at the port override
 	 * level to get the data to flow. We do use what the interrupt handler
 	 * did determine before.
+	 *
+	 * For the other ports, we just force the link status, since this is
+	 * a fixed PHY device.
 	 */
 	if (port == 7) {
 		status->link = priv->port_sts[port].link;
-		reg = core_readl(priv, CORE_STS_OVERRIDE_GMIIP_PORT(7));
-		reg |= SW_OVERRIDE;
-		if (status->link)
-			reg |= LINK_STS;
-		else
-			reg &= ~LINK_STS;
-		core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(7));
 		status->duplex = 1;
 	} else {
-		status->link = !!(link & (1 << port));
+		status->link = 1;
 		status->duplex = !!(duplex & (1 << port));
 	}
 
+	reg = core_readl(priv, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+	reg |= SW_OVERRIDE;
+	if (status->link)
+		reg |= LINK_STS;
+	else
+		reg &= ~LINK_STS;
+	core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+
 	switch (speed) {
 	case SPDSTS_10:
 		status->speed = SPEED_10;
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index b6bc318..0987d2a 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -66,23 +66,25 @@
 	return index & (TX_RING_SIZE - 1);
 }
 
-static struct macb_dma_desc *macb_tx_desc(struct macb *bp, unsigned int index)
+static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
+					  unsigned int index)
 {
-	return &bp->tx_ring[macb_tx_ring_wrap(index)];
+	return &queue->tx_ring[macb_tx_ring_wrap(index)];
 }
 
-static struct macb_tx_skb *macb_tx_skb(struct macb *bp, unsigned int index)
+static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
+				       unsigned int index)
 {
-	return &bp->tx_skb[macb_tx_ring_wrap(index)];
+	return &queue->tx_skb[macb_tx_ring_wrap(index)];
 }
 
-static dma_addr_t macb_tx_dma(struct macb *bp, unsigned int index)
+static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 {
 	dma_addr_t offset;
 
 	offset = macb_tx_ring_wrap(index) * sizeof(struct macb_dma_desc);
 
-	return bp->tx_ring_dma + offset;
+	return queue->tx_ring_dma + offset;
 }
 
 static unsigned int macb_rx_ring_wrap(unsigned int index)
@@ -490,38 +492,49 @@
 
 static void macb_tx_error_task(struct work_struct *work)
 {
-	struct macb	*bp = container_of(work, struct macb, tx_error_task);
+	struct macb_queue	*queue = container_of(work, struct macb_queue,
+						      tx_error_task);
+	struct macb		*bp = queue->bp;
 	struct macb_tx_skb	*tx_skb;
+	struct macb_dma_desc	*desc;
 	struct sk_buff		*skb;
 	unsigned int		tail;
+	unsigned long		flags;
 
-	netdev_vdbg(bp->dev, "macb_tx_error_task: t = %u, h = %u\n",
-		    bp->tx_tail, bp->tx_head);
+	netdev_vdbg(bp->dev, "macb_tx_error_task: q = %u, t = %u, h = %u\n",
+		    (unsigned int)(queue - bp->queues),
+		    queue->tx_tail, queue->tx_head);
+
+	/* Prevent the queue IRQ handlers from running: each of them may call
+	 * macb_tx_interrupt(), which in turn may call netif_wake_subqueue().
+	 * As explained below, we have to halt the transmission before updating
+	 * TBQP registers so we call netif_tx_stop_all_queues() to notify the
+	 * network engine about the macb/gem being halted.
+	 */
+	spin_lock_irqsave(&bp->lock, flags);
 
 	/* Make sure nobody is trying to queue up new packets */
-	netif_stop_queue(bp->dev);
+	netif_tx_stop_all_queues(bp->dev);
 
 	/*
 	 * Stop transmission now
 	 * (in case we have just queued new packets)
+	 * macb/gem must be halted to write TBQP register
 	 */
 	if (macb_halt_tx(bp))
 		/* Just complain for now, reinitializing TX path can be good */
 		netdev_err(bp->dev, "BUG: halt tx timed out\n");
 
-	/* No need for the lock here as nobody will interrupt us anymore */
-
 	/*
 	 * Treat frames in TX queue including the ones that caused the error.
 	 * Free transmit buffers in upper layer.
 	 */
-	for (tail = bp->tx_tail; tail != bp->tx_head; tail++) {
-		struct macb_dma_desc	*desc;
-		u32			ctrl;
+	for (tail = queue->tx_tail; tail != queue->tx_head; tail++) {
+		u32	ctrl;
 
-		desc = macb_tx_desc(bp, tail);
+		desc = macb_tx_desc(queue, tail);
 		ctrl = desc->ctrl;
-		tx_skb = macb_tx_skb(bp, tail);
+		tx_skb = macb_tx_skb(queue, tail);
 		skb = tx_skb->skb;
 
 		if (ctrl & MACB_BIT(TX_USED)) {
@@ -529,7 +542,7 @@
 			while (!skb) {
 				macb_tx_unmap(bp, tx_skb);
 				tail++;
-				tx_skb = macb_tx_skb(bp, tail);
+				tx_skb = macb_tx_skb(queue, tail);
 				skb = tx_skb->skb;
 			}
 
@@ -558,45 +571,56 @@
 		macb_tx_unmap(bp, tx_skb);
 	}
 
+	/* Set end of TX queue */
+	desc = macb_tx_desc(queue, 0);
+	desc->addr = 0;
+	desc->ctrl = MACB_BIT(TX_USED);
+
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	macb_writel(bp, TBQP, bp->tx_ring_dma);
+	queue_writel(queue, TBQP, queue->tx_ring_dma);
 	/* Make TX ring reflect state of hardware */
-	bp->tx_head = bp->tx_tail = 0;
-
-	/* Now we are ready to start transmission again */
-	netif_wake_queue(bp->dev);
+	queue->tx_head = 0;
+	queue->tx_tail = 0;
 
 	/* Housework before enabling TX IRQ */
 	macb_writel(bp, TSR, macb_readl(bp, TSR));
-	macb_writel(bp, IER, MACB_TX_INT_FLAGS);
+	queue_writel(queue, IER, MACB_TX_INT_FLAGS);
+
+	/* Now we are ready to start transmission again */
+	netif_tx_start_all_queues(bp->dev);
+	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
+
+	spin_unlock_irqrestore(&bp->lock, flags);
 }
 
-static void macb_tx_interrupt(struct macb *bp)
+static void macb_tx_interrupt(struct macb_queue *queue)
 {
 	unsigned int tail;
 	unsigned int head;
 	u32 status;
+	struct macb *bp = queue->bp;
+	u16 queue_index = queue - bp->queues;
 
 	status = macb_readl(bp, TSR);
 	macb_writel(bp, TSR, status);
 
 	if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-		macb_writel(bp, ISR, MACB_BIT(TCOMP));
+		queue_writel(queue, ISR, MACB_BIT(TCOMP));
 
 	netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n",
 		(unsigned long)status);
 
-	head = bp->tx_head;
-	for (tail = bp->tx_tail; tail != head; tail++) {
+	head = queue->tx_head;
+	for (tail = queue->tx_tail; tail != head; tail++) {
 		struct macb_tx_skb	*tx_skb;
 		struct sk_buff		*skb;
 		struct macb_dma_desc	*desc;
 		u32			ctrl;
 
-		desc = macb_tx_desc(bp, tail);
+		desc = macb_tx_desc(queue, tail);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
@@ -611,7 +635,7 @@
 
 		/* Process all buffers of the current transmitted frame */
 		for (;; tail++) {
-			tx_skb = macb_tx_skb(bp, tail);
+			tx_skb = macb_tx_skb(queue, tail);
 			skb = tx_skb->skb;
 
 			/* First, update TX stats if needed */
@@ -634,11 +658,11 @@
 		}
 	}
 
-	bp->tx_tail = tail;
-	if (netif_queue_stopped(bp->dev)
-			&& CIRC_CNT(bp->tx_head, bp->tx_tail,
-				    TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH)
-		netif_wake_queue(bp->dev);
+	queue->tx_tail = tail;
+	if (__netif_subqueue_stopped(bp->dev, queue_index) &&
+	    CIRC_CNT(queue->tx_head, queue->tx_tail,
+		     TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH)
+		netif_wake_subqueue(bp->dev, queue_index);
 }
 
 static void gem_rx_refill(struct macb *bp)
@@ -949,11 +973,12 @@
 
 static irqreturn_t macb_interrupt(int irq, void *dev_id)
 {
-	struct net_device *dev = dev_id;
-	struct macb *bp = netdev_priv(dev);
+	struct macb_queue *queue = dev_id;
+	struct macb *bp = queue->bp;
+	struct net_device *dev = bp->dev;
 	u32 status;
 
-	status = macb_readl(bp, ISR);
+	status = queue_readl(queue, ISR);
 
 	if (unlikely(!status))
 		return IRQ_NONE;
@@ -963,11 +988,13 @@
 	while (status) {
 		/* close possible race with dev_close */
 		if (unlikely(!netif_running(dev))) {
-			macb_writel(bp, IDR, -1);
+			queue_writel(queue, IDR, -1);
 			break;
 		}
 
-		netdev_vdbg(bp->dev, "isr = 0x%08lx\n", (unsigned long)status);
+		netdev_vdbg(bp->dev, "queue = %u, isr = 0x%08lx\n",
+			    (unsigned int)(queue - bp->queues),
+			    (unsigned long)status);
 
 		if (status & MACB_RX_INT_FLAGS) {
 			/*
@@ -977,9 +1004,9 @@
 			 * is already scheduled, so disable interrupts
 			 * now.
 			 */
-			macb_writel(bp, IDR, MACB_RX_INT_FLAGS);
+			queue_writel(queue, IDR, MACB_RX_INT_FLAGS);
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-				macb_writel(bp, ISR, MACB_BIT(RCOMP));
+				queue_writel(queue, ISR, MACB_BIT(RCOMP));
 
 			if (napi_schedule_prep(&bp->napi)) {
 				netdev_vdbg(bp->dev, "scheduling RX softirq\n");
@@ -988,17 +1015,17 @@
 		}
 
 		if (unlikely(status & (MACB_TX_ERR_FLAGS))) {
-			macb_writel(bp, IDR, MACB_TX_INT_FLAGS);
-			schedule_work(&bp->tx_error_task);
+			queue_writel(queue, IDR, MACB_TX_INT_FLAGS);
+			schedule_work(&queue->tx_error_task);
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-				macb_writel(bp, ISR, MACB_TX_ERR_FLAGS);
+				queue_writel(queue, ISR, MACB_TX_ERR_FLAGS);
 
 			break;
 		}
 
 		if (status & MACB_BIT(TCOMP))
-			macb_tx_interrupt(bp);
+			macb_tx_interrupt(queue);
 
 		/*
 		 * Link change detection isn't possible with RMII, so we'll
@@ -1013,7 +1040,7 @@
 				bp->hw_stats.macb.rx_overruns++;
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-				macb_writel(bp, ISR, MACB_BIT(ISR_ROVR));
+				queue_writel(queue, ISR, MACB_BIT(ISR_ROVR));
 		}
 
 		if (status & MACB_BIT(HRESP)) {
@@ -1025,10 +1052,10 @@
 			netdev_err(dev, "DMA bus error: HRESP not OK\n");
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
-				macb_writel(bp, ISR, MACB_BIT(HRESP));
+				queue_writel(queue, ISR, MACB_BIT(HRESP));
 		}
 
-		status = macb_readl(bp, ISR);
+		status = queue_readl(queue, ISR);
 	}
 
 	spin_unlock(&bp->lock);
@@ -1043,10 +1070,14 @@
  */
 static void macb_poll_controller(struct net_device *dev)
 {
+	struct macb *bp = netdev_priv(dev);
+	struct macb_queue *queue;
 	unsigned long flags;
+	unsigned int q;
 
 	local_irq_save(flags);
-	macb_interrupt(dev->irq, dev);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue)
+		macb_interrupt(dev->irq, queue);
 	local_irq_restore(flags);
 }
 #endif
@@ -1058,10 +1089,11 @@
 }
 
 static unsigned int macb_tx_map(struct macb *bp,
+				struct macb_queue *queue,
 				struct sk_buff *skb)
 {
 	dma_addr_t mapping;
-	unsigned int len, entry, i, tx_head = bp->tx_head;
+	unsigned int len, entry, i, tx_head = queue->tx_head;
 	struct macb_tx_skb *tx_skb = NULL;
 	struct macb_dma_desc *desc;
 	unsigned int offset, size, count = 0;
@@ -1075,7 +1107,7 @@
 	while (len) {
 		size = min(len, bp->max_tx_length);
 		entry = macb_tx_ring_wrap(tx_head);
-		tx_skb = &bp->tx_skb[entry];
+		tx_skb = &queue->tx_skb[entry];
 
 		mapping = dma_map_single(&bp->pdev->dev,
 					 skb->data + offset,
@@ -1104,7 +1136,7 @@
 		while (len) {
 			size = min(len, bp->max_tx_length);
 			entry = macb_tx_ring_wrap(tx_head);
-			tx_skb = &bp->tx_skb[entry];
+			tx_skb = &queue->tx_skb[entry];
 
 			mapping = skb_frag_dma_map(&bp->pdev->dev, frag,
 						   offset, size, DMA_TO_DEVICE);
@@ -1143,14 +1175,14 @@
 	i = tx_head;
 	entry = macb_tx_ring_wrap(i);
 	ctrl = MACB_BIT(TX_USED);
-	desc = &bp->tx_ring[entry];
+	desc = &queue->tx_ring[entry];
 	desc->ctrl = ctrl;
 
 	do {
 		i--;
 		entry = macb_tx_ring_wrap(i);
-		tx_skb = &bp->tx_skb[entry];
-		desc = &bp->tx_ring[entry];
+		tx_skb = &queue->tx_skb[entry];
+		desc = &queue->tx_ring[entry];
 
 		ctrl = (u32)tx_skb->size;
 		if (eof) {
@@ -1167,17 +1199,17 @@
 		 */
 		wmb();
 		desc->ctrl = ctrl;
-	} while (i != bp->tx_head);
+	} while (i != queue->tx_head);
 
-	bp->tx_head = tx_head;
+	queue->tx_head = tx_head;
 
 	return count;
 
 dma_error:
 	netdev_err(bp->dev, "TX DMA map failed\n");
 
-	for (i = bp->tx_head; i != tx_head; i++) {
-		tx_skb = macb_tx_skb(bp, i);
+	for (i = queue->tx_head; i != tx_head; i++) {
+		tx_skb = macb_tx_skb(queue, i);
 
 		macb_tx_unmap(bp, tx_skb);
 	}
@@ -1187,14 +1219,16 @@
 
 static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	u16 queue_index = skb_get_queue_mapping(skb);
 	struct macb *bp = netdev_priv(dev);
+	struct macb_queue *queue = &bp->queues[queue_index];
 	unsigned long flags;
 	unsigned int count, nr_frags, frag_size, f;
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->dev,
-		   "start_xmit: len %u head %p data %p tail %p end %p\n",
-		   skb->len, skb->head, skb->data,
+		   "start_xmit: queue %hu len %u head %p data %p tail %p end %p\n",
+		   queue_index, skb->len, skb->head, skb->data,
 		   skb_tail_pointer(skb), skb_end_pointer(skb));
 	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
 		       skb->data, 16, true);
@@ -1214,16 +1248,16 @@
 	spin_lock_irqsave(&bp->lock, flags);
 
 	/* This is a hard error, log it. */
-	if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < count) {
-		netif_stop_queue(dev);
+	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < count) {
+		netif_stop_subqueue(dev, queue_index);
 		spin_unlock_irqrestore(&bp->lock, flags);
 		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
-			   bp->tx_head, bp->tx_tail);
+			   queue->tx_head, queue->tx_tail);
 		return NETDEV_TX_BUSY;
 	}
 
 	/* Map socket buffer for DMA transfer */
-	if (!macb_tx_map(bp, skb)) {
+	if (!macb_tx_map(bp, queue, skb)) {
 		dev_kfree_skb_any(skb);
 		goto unlock;
 	}
@@ -1235,8 +1269,8 @@
 
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 
-	if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1)
-		netif_stop_queue(dev);
+	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < 1)
+		netif_stop_subqueue(dev, queue_index);
 
 unlock:
 	spin_unlock_irqrestore(&bp->lock, flags);
@@ -1304,20 +1338,24 @@
 
 static void macb_free_consistent(struct macb *bp)
 {
-	if (bp->tx_skb) {
-		kfree(bp->tx_skb);
-		bp->tx_skb = NULL;
-	}
+	struct macb_queue *queue;
+	unsigned int q;
+
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 	if (bp->rx_ring) {
 		dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES,
 				  bp->rx_ring, bp->rx_ring_dma);
 		bp->rx_ring = NULL;
 	}
-	if (bp->tx_ring) {
-		dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES,
-				  bp->tx_ring, bp->tx_ring_dma);
-		bp->tx_ring = NULL;
+
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		kfree(queue->tx_skb);
+		queue->tx_skb = NULL;
+		if (queue->tx_ring) {
+			dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES,
+					  queue->tx_ring, queue->tx_ring_dma);
+			queue->tx_ring = NULL;
+		}
 	}
 }
 
@@ -1354,12 +1392,27 @@
 
 static int macb_alloc_consistent(struct macb *bp)
 {
+	struct macb_queue *queue;
+	unsigned int q;
 	int size;
 
-	size = TX_RING_SIZE * sizeof(struct macb_tx_skb);
-	bp->tx_skb = kmalloc(size, GFP_KERNEL);
-	if (!bp->tx_skb)
-		goto out_err;
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		size = TX_RING_BYTES;
+		queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
+						    &queue->tx_ring_dma,
+						    GFP_KERNEL);
+		if (!queue->tx_ring)
+			goto out_err;
+		netdev_dbg(bp->dev,
+			   "Allocated TX ring for queue %u of %d bytes at %08lx (mapped %p)\n",
+			   q, size, (unsigned long)queue->tx_ring_dma,
+			   queue->tx_ring);
+
+		size = TX_RING_SIZE * sizeof(struct macb_tx_skb);
+		queue->tx_skb = kmalloc(size, GFP_KERNEL);
+		if (!queue->tx_skb)
+			goto out_err;
+	}
 
 	size = RX_RING_BYTES;
 	bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
@@ -1370,15 +1423,6 @@
 		   "Allocated RX ring of %d bytes at %08lx (mapped %p)\n",
 		   size, (unsigned long)bp->rx_ring_dma, bp->rx_ring);
 
-	size = TX_RING_BYTES;
-	bp->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
-					 &bp->tx_ring_dma, GFP_KERNEL);
-	if (!bp->tx_ring)
-		goto out_err;
-	netdev_dbg(bp->dev,
-		   "Allocated TX ring of %d bytes at %08lx (mapped %p)\n",
-		   size, (unsigned long)bp->tx_ring_dma, bp->tx_ring);
-
 	if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
 		goto out_err;
 
@@ -1391,15 +1435,22 @@
 
 static void gem_init_rings(struct macb *bp)
 {
+	struct macb_queue *queue;
+	unsigned int q;
 	int i;
 
-	for (i = 0; i < TX_RING_SIZE; i++) {
-		bp->tx_ring[i].addr = 0;
-		bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		for (i = 0; i < TX_RING_SIZE; i++) {
+			queue->tx_ring[i].addr = 0;
+			queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
+		}
+		queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
+		queue->tx_head = 0;
+		queue->tx_tail = 0;
 	}
-	bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
 
-	bp->rx_tail = bp->rx_prepared_head = bp->tx_head = bp->tx_tail = 0;
+	bp->rx_tail = 0;
+	bp->rx_prepared_head = 0;
 
 	gem_rx_refill(bp);
 }
@@ -1418,16 +1469,21 @@
 	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
-		bp->tx_ring[i].addr = 0;
-		bp->tx_ring[i].ctrl = MACB_BIT(TX_USED);
+		bp->queues[0].tx_ring[i].addr = 0;
+		bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED);
+		bp->queues[0].tx_head = 0;
+		bp->queues[0].tx_tail = 0;
 	}
-	bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
+	bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
 
-	bp->rx_tail = bp->tx_head = bp->tx_tail = 0;
+	bp->rx_tail = 0;
 }
 
 static void macb_reset_hw(struct macb *bp)
 {
+	struct macb_queue *queue;
+	unsigned int q;
+
 	/*
 	 * Disable RX and TX (XXX: Should we halt the transmission
 	 * more gracefully?)
@@ -1442,8 +1498,10 @@
 	macb_writel(bp, RSR, -1);
 
 	/* Disable all interrupts */
-	macb_writel(bp, IDR, -1);
-	macb_readl(bp, ISR);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		queue_writel(queue, IDR, -1);
+		queue_readl(queue, ISR);
+	}
 }
 
 static u32 gem_mdc_clk_div(struct macb *bp)
@@ -1540,6 +1598,9 @@
 
 static void macb_init_hw(struct macb *bp)
 {
+	struct macb_queue *queue;
+	unsigned int q;
+
 	u32 config;
 
 	macb_reset_hw(bp);
@@ -1565,16 +1626,18 @@
 
 	/* Initialize TX and RX buffers */
 	macb_writel(bp, RBQP, bp->rx_ring_dma);
-	macb_writel(bp, TBQP, bp->tx_ring_dma);
+	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+		queue_writel(queue, TBQP, queue->tx_ring_dma);
+
+		/* Enable interrupts */
+		queue_writel(queue, IER,
+			     MACB_RX_INT_FLAGS |
+			     MACB_TX_INT_FLAGS |
+			     MACB_BIT(HRESP));
+	}
 
 	/* Enable TX and RX */
 	macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE));
-
-	/* Enable interrupts */
-	macb_writel(bp, IER, (MACB_RX_INT_FLAGS
-			      | MACB_TX_INT_FLAGS
-			      | MACB_BIT(HRESP)));
-
 }
 
 /*
@@ -1736,7 +1799,7 @@
 	/* schedule a link state check */
 	phy_start(bp->phy_dev);
 
-	netif_start_queue(dev);
+	netif_tx_start_all_queues(dev);
 
 	return 0;
 }
@@ -1746,7 +1809,7 @@
 	struct macb *bp = netdev_priv(dev);
 	unsigned long flags;
 
-	netif_stop_queue(dev);
+	netif_tx_stop_all_queues(dev);
 	napi_disable(&bp->napi);
 
 	if (bp->phy_dev)
@@ -1895,8 +1958,8 @@
 	regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1))
 			| MACB_GREGS_VERSION;
 
-	tail = macb_tx_ring_wrap(bp->tx_tail);
-	head = macb_tx_ring_wrap(bp->tx_head);
+	tail = macb_tx_ring_wrap(bp->queues[0].tx_tail);
+	head = macb_tx_ring_wrap(bp->queues[0].tx_head);
 
 	regs_buff[0]  = macb_readl(bp, NCR);
 	regs_buff[1]  = macb_or_gem_readl(bp, NCFGR);
@@ -1909,8 +1972,8 @@
 
 	regs_buff[8]  = tail;
 	regs_buff[9]  = head;
-	regs_buff[10] = macb_tx_dma(bp, tail);
-	regs_buff[11] = macb_tx_dma(bp, head);
+	regs_buff[10] = macb_tx_dma(&bp->queues[0], tail);
+	regs_buff[11] = macb_tx_dma(&bp->queues[0], head);
 
 	if (macb_is_gem(bp)) {
 		regs_buff[12] = gem_readl(bp, USRIO);
@@ -2061,16 +2124,44 @@
 	netdev_dbg(bp->dev, "Cadence caps 0x%08x\n", bp->caps);
 }
 
+static void macb_probe_queues(void __iomem *mem,
+			      unsigned int *queue_mask,
+			      unsigned int *num_queues)
+{
+	unsigned int hw_q;
+	u32 mid;
+
+	*queue_mask = 0x1;
+	*num_queues = 1;
+
+	/* is it macb or gem ? */
+	mid = __raw_readl(mem + MACB_MID);
+	if (MACB_BFEXT(IDNUM, mid) != 0x2)
+		return;
+
+	/* bit 0 is never set but queue 0 always exists */
+	*queue_mask = __raw_readl(mem + GEM_DCFG6) & 0xff;
+	*queue_mask |= 0x1;
+
+	for (hw_q = 1; hw_q < MACB_MAX_QUEUES; ++hw_q)
+		if (*queue_mask & (1 << hw_q))
+			(*num_queues)++;
+}
+
 static int __init macb_probe(struct platform_device *pdev)
 {
 	struct macb_platform_data *pdata;
 	struct resource *regs;
 	struct net_device *dev;
 	struct macb *bp;
+	struct macb_queue *queue;
 	struct phy_device *phydev;
 	u32 config;
 	int err = -ENXIO;
 	const char *mac;
+	void __iomem *mem;
+	unsigned int hw_q, queue_mask, q, num_queues, q_irq = 0;
+	struct clk *pclk, *hclk, *tx_clk;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!regs) {
@@ -2078,72 +2169,112 @@
 		goto err_out;
 	}
 
-	err = -ENOMEM;
-	dev = alloc_etherdev(sizeof(*bp));
-	if (!dev)
+	pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(pclk)) {
+		err = PTR_ERR(pclk);
+		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
 		goto err_out;
+	}
+
+	hclk = devm_clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(hclk)) {
+		err = PTR_ERR(hclk);
+		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
+		goto err_out;
+	}
+
+	tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+
+	err = clk_prepare_enable(pclk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
+		goto err_out;
+	}
+
+	err = clk_prepare_enable(hclk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err);
+		goto err_out_disable_pclk;
+	}
+
+	if (!IS_ERR(tx_clk)) {
+		err = clk_prepare_enable(tx_clk);
+		if (err) {
+			dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n",
+				err);
+			goto err_out_disable_hclk;
+		}
+	}
+
+	err = -ENOMEM;
+	mem = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
+	if (!mem) {
+		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
+		goto err_out_disable_clocks;
+	}
+
+	macb_probe_queues(mem, &queue_mask, &num_queues);
+	dev = alloc_etherdev_mq(sizeof(*bp), num_queues);
+	if (!dev)
+		goto err_out_disable_clocks;
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	bp = netdev_priv(dev);
 	bp->pdev = pdev;
 	bp->dev = dev;
+	bp->regs = mem;
+	bp->num_queues = num_queues;
+	bp->pclk = pclk;
+	bp->hclk = hclk;
+	bp->tx_clk = tx_clk;
 
 	spin_lock_init(&bp->lock);
-	INIT_WORK(&bp->tx_error_task, macb_tx_error_task);
 
-	bp->pclk = devm_clk_get(&pdev->dev, "pclk");
-	if (IS_ERR(bp->pclk)) {
-		err = PTR_ERR(bp->pclk);
-		dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err);
-		goto err_out_free_dev;
-	}
+	/* set the queue register mapping once for all: queue0 has a special
+	 * register mapping but we don't want to test the queue index then
+	 * compute the corresponding register offset at run time.
+	 */
+	for (hw_q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
+		if (!(queue_mask & (1 << hw_q)))
+			continue;
 
-	bp->hclk = devm_clk_get(&pdev->dev, "hclk");
-	if (IS_ERR(bp->hclk)) {
-		err = PTR_ERR(bp->hclk);
-		dev_err(&pdev->dev, "failed to get hclk (%u)\n", err);
-		goto err_out_free_dev;
-	}
-
-	bp->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
-
-	err = clk_prepare_enable(bp->pclk);
-	if (err) {
-		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
-		goto err_out_free_dev;
-	}
-
-	err = clk_prepare_enable(bp->hclk);
-	if (err) {
-		dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err);
-		goto err_out_disable_pclk;
-	}
-
-	if (!IS_ERR(bp->tx_clk)) {
-		err = clk_prepare_enable(bp->tx_clk);
-		if (err) {
-			dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n",
-					err);
-			goto err_out_disable_hclk;
+		queue = &bp->queues[q_irq];
+		queue->bp = bp;
+		if (hw_q) {
+			queue->ISR  = GEM_ISR(hw_q - 1);
+			queue->IER  = GEM_IER(hw_q - 1);
+			queue->IDR  = GEM_IDR(hw_q - 1);
+			queue->IMR  = GEM_IMR(hw_q - 1);
+			queue->TBQP = GEM_TBQP(hw_q - 1);
+		} else {
+			/* queue0 uses legacy registers */
+			queue->ISR  = MACB_ISR;
+			queue->IER  = MACB_IER;
+			queue->IDR  = MACB_IDR;
+			queue->IMR  = MACB_IMR;
+			queue->TBQP = MACB_TBQP;
 		}
-	}
 
-	bp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
-	if (!bp->regs) {
-		dev_err(&pdev->dev, "failed to map registers, aborting.\n");
-		err = -ENOMEM;
-		goto err_out_disable_clocks;
-	}
+		/* get irq: here we use the linux queue index, not the hardware
+		 * queue index. the queue irq definitions in the device tree
+		 * must remove the optional gaps that could exist in the
+		 * hardware queue mask.
+		 */
+		queue->irq = platform_get_irq(pdev, q_irq);
+		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
+				       0, dev->name, queue);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Unable to request IRQ %d (error %d)\n",
+				queue->irq, err);
+			goto err_out_free_irq;
+		}
 
-	dev->irq = platform_get_irq(pdev, 0);
-	err = devm_request_irq(&pdev->dev, dev->irq, macb_interrupt, 0,
-			dev->name, dev);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to request IRQ %d (error %d)\n",
-			dev->irq, err);
-		goto err_out_disable_clocks;
+		INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
+		q_irq++;
 	}
+	dev->irq = bp->queues[0].irq;
 
 	dev->netdev_ops = &macb_netdev_ops;
 	netif_napi_add(dev, &bp->napi, macb_poll, 64);
@@ -2219,7 +2350,7 @@
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_disable_clocks;
+		goto err_out_free_irq;
 	}
 
 	err = macb_mii_init(bp);
@@ -2242,15 +2373,17 @@
 
 err_out_unregister_netdev:
 	unregister_netdev(dev);
-err_out_disable_clocks:
-	if (!IS_ERR(bp->tx_clk))
-		clk_disable_unprepare(bp->tx_clk);
-err_out_disable_hclk:
-	clk_disable_unprepare(bp->hclk);
-err_out_disable_pclk:
-	clk_disable_unprepare(bp->pclk);
-err_out_free_dev:
+err_out_free_irq:
+	for (q = 0, queue = bp->queues; q < q_irq; ++q, ++queue)
+		devm_free_irq(&pdev->dev, queue->irq, queue);
 	free_netdev(dev);
+err_out_disable_clocks:
+	if (!IS_ERR(tx_clk))
+		clk_disable_unprepare(tx_clk);
+err_out_disable_hclk:
+	clk_disable_unprepare(hclk);
+err_out_disable_pclk:
+	clk_disable_unprepare(pclk);
 err_out:
 	return err;
 }
@@ -2259,6 +2392,8 @@
 {
 	struct net_device *dev;
 	struct macb *bp;
+	struct macb_queue *queue;
+	unsigned int q;
 
 	dev = platform_get_drvdata(pdev);
 
@@ -2270,11 +2405,14 @@
 		kfree(bp->mii_bus->irq);
 		mdiobus_free(bp->mii_bus);
 		unregister_netdev(dev);
+		queue = bp->queues;
+		for (q = 0; q < bp->num_queues; ++q, ++queue)
+			devm_free_irq(&pdev->dev, queue->irq, queue);
+		free_netdev(dev);
 		if (!IS_ERR(bp->tx_clk))
 			clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
-		free_netdev(dev);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 517c09d..084191b 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -12,6 +12,7 @@
 
 #define MACB_GREGS_NBR 16
 #define MACB_GREGS_VERSION 1
+#define MACB_MAX_QUEUES 8
 
 /* MACB register offsets */
 #define MACB_NCR				0x0000
@@ -89,6 +90,13 @@
 #define GEM_DCFG6				0x0294
 #define GEM_DCFG7				0x0298
 
+#define GEM_ISR(hw_q)				(0x0400 + ((hw_q) << 2))
+#define GEM_TBQP(hw_q)				(0x0440 + ((hw_q) << 2))
+#define GEM_RBQP(hw_q)				(0x0480 + ((hw_q) << 2))
+#define GEM_IER(hw_q)				(0x0600 + ((hw_q) << 2))
+#define GEM_IDR(hw_q)				(0x0620 + ((hw_q) << 2))
+#define GEM_IMR(hw_q)				(0x0640 + ((hw_q) << 2))
+
 /* Bitfields in NCR */
 #define MACB_LB_OFFSET				0
 #define MACB_LB_SIZE				1
@@ -376,6 +384,10 @@
 	__raw_readl((port)->regs + GEM_##reg)
 #define gem_writel(port, reg, value)			\
 	__raw_writel((value), (port)->regs + GEM_##reg)
+#define queue_readl(queue, reg)				\
+	__raw_readl((queue)->bp->regs + (queue)->reg)
+#define queue_writel(queue, reg, value)			\
+	__raw_writel((value), (queue)->bp->regs + (queue)->reg)
 
 /*
  * Conditional GEM/MACB macros.  These perform the operation to the correct
@@ -597,6 +609,23 @@
 	unsigned int		dma_burst_length;
 };
 
+struct macb_queue {
+	struct macb		*bp;
+	int			irq;
+
+	unsigned int		ISR;
+	unsigned int		IER;
+	unsigned int		IDR;
+	unsigned int		IMR;
+	unsigned int		TBQP;
+
+	unsigned int		tx_head, tx_tail;
+	struct macb_dma_desc	*tx_ring;
+	struct macb_tx_skb	*tx_skb;
+	dma_addr_t		tx_ring_dma;
+	struct work_struct	tx_error_task;
+};
+
 struct macb {
 	void __iomem		*regs;
 
@@ -607,9 +636,8 @@
 	void			*rx_buffers;
 	size_t			rx_buffer_size;
 
-	unsigned int		tx_head, tx_tail;
-	struct macb_dma_desc	*tx_ring;
-	struct macb_tx_skb	*tx_skb;
+	unsigned int		num_queues;
+	struct macb_queue	queues[MACB_MAX_QUEUES];
 
 	spinlock_t		lock;
 	struct platform_device	*pdev;
@@ -618,7 +646,6 @@
 	struct clk		*tx_clk;
 	struct net_device	*dev;
 	struct napi_struct	napi;
-	struct work_struct	tx_error_task;
 	struct net_device_stats	stats;
 	union {
 		struct macb_stats	macb;
@@ -626,7 +653,6 @@
 	}			hw_stats;
 
 	dma_addr_t		rx_ring_dma;
-	dma_addr_t		tx_ring_dma;
 	dma_addr_t		rx_buffers_dma;
 
 	struct macb_or_gem_ops	macbgem_ops;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a18d33f..5ab5c31 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -392,7 +392,7 @@
 	s16    xact_addr_filt;        /* index of exact MAC address filter */
 	u16    rss_size;              /* size of VI's RSS table slice */
 	s8     mdio_addr;
-	u8     port_type;
+	enum fw_port_type port_type;
 	u8     mod_type;
 	u8     port_id;
 	u8     tx_chan;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 973dbb7..ccf3436 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2325,7 +2325,7 @@
 	return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid, val);
 }
 
-static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
+static unsigned int from_fw_linkcaps(enum fw_port_type type, unsigned int caps)
 {
 	unsigned int v = 0;
 
@@ -2354,14 +2354,20 @@
 		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
 		     SUPPORTED_10000baseKX4_Full;
 	else if (type == FW_PORT_TYPE_FIBER_XFI ||
-		 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP) {
+		 type == FW_PORT_TYPE_FIBER_XAUI ||
+		 type == FW_PORT_TYPE_SFP ||
+		 type == FW_PORT_TYPE_QSFP_10G ||
+		 type == FW_PORT_TYPE_QSA) {
 		v |= SUPPORTED_FIBRE;
 		if (caps & FW_PORT_CAP_SPEED_1G)
 			v |= SUPPORTED_1000baseT_Full;
 		if (caps & FW_PORT_CAP_SPEED_10G)
 			v |= SUPPORTED_10000baseT_Full;
-	} else if (type == FW_PORT_TYPE_BP40_BA)
+	} else if (type == FW_PORT_TYPE_BP40_BA ||
+		   type == FW_PORT_TYPE_QSFP) {
 		v |= SUPPORTED_40000baseSR4_Full;
+		v |= SUPPORTED_FIBRE;
+	}
 
 	if (caps & FW_PORT_CAP_ANEG)
 		v |= SUPPORTED_Autoneg;
@@ -2396,6 +2402,7 @@
 		cmd->port = PORT_FIBRE;
 	else if (p->port_type == FW_PORT_TYPE_SFP ||
 		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
+		 p->port_type == FW_PORT_TYPE_QSA ||
 		 p->port_type == FW_PORT_TYPE_QSFP) {
 		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
 		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index beaf80a..291b6f2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -560,6 +560,7 @@
 	FW_FLOWC_MNEM_RCVNXT,
 	FW_FLOWC_MNEM_SNDBUF,
 	FW_FLOWC_MNEM_MSS,
+	FW_FLOWC_MNEM_TXDATAPLEN_MAX,
 };
 
 struct fw_flowc_mnemval {
@@ -2470,6 +2471,7 @@
 	FW_PORT_TYPE_BP4_AP,
 	FW_PORT_TYPE_QSFP_10G,
 	FW_PORT_TYPE_QSFP,
+	FW_PORT_TYPE_QSA,
 	FW_PORT_TYPE_BP40_BA,
 
 	FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M
diff --git a/drivers/net/ethernet/davicom/Kconfig b/drivers/net/ethernet/davicom/Kconfig
index 316c5e5..7ec2d74 100644
--- a/drivers/net/ethernet/davicom/Kconfig
+++ b/drivers/net/ethernet/davicom/Kconfig
@@ -4,7 +4,7 @@
 
 config DM9000
 	tristate "DM9000 support"
-	depends on ARM || BLACKFIN || MIPS || COLDFIRE
+	depends on ARM || BLACKFIN || MIPS || COLDFIRE || NIOS2
 	select CRC32
 	select MII
 	---help---
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index ee1ecb1..eb088b1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -615,14 +615,14 @@
 
 		rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-		if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
+		if (!rx_desc->d.staterr)
 			break;
 
 		/* This memory barrier is needed to keep us from reading
 		 * any other fields out of the rx_desc until we know the
-		 * RXD_STATUS_DD bit is set
+		 * descriptor has been written back
 		 */
-		rmb();
+		dma_rmb();
 
 		/* retrieve a buffer from the ring */
 		skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2e526d4..ff59897 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6910,14 +6910,14 @@
 
 		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-		if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD))
+		if (!rx_desc->wb.upper.status_error)
 			break;
 
 		/* This memory barrier is needed to keep us from reading
 		 * any other fields out of the rx_desc until we know the
-		 * RXD_STAT_DD bit is set
+		 * descriptor has been written back
 		 */
-		rmb();
+		dma_rmb();
 
 		/* retrieve a buffer from the ring */
 		skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 798b055..2ed2c7d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2009,15 +2009,14 @@
 
 		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+		if (!rx_desc->wb.upper.status_error)
 			break;
 
-		/*
-		 * This memory barrier is needed to keep us from reading
+		/* This memory barrier is needed to keep us from reading
 		 * any other fields out of the rx_desc until we know the
-		 * RXD_STAT_DD bit is set
+		 * descriptor has been written back
 		 */
-		rmb();
+		dma_rmb();
 
 		/* retrieve a buffer from the ring */
 		skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc);
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 4a1be34..44ce7d8 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1364,8 +1364,8 @@
 	jme_free_rx_resources(jme);
 out_enable_tasklet:
 	tasklet_enable(&jme->txclean_task);
-	tasklet_hi_enable(&jme->rxclean_task);
-	tasklet_hi_enable(&jme->rxempty_task);
+	tasklet_enable(&jme->rxclean_task);
+	tasklet_enable(&jme->rxempty_task);
 out:
 	atomic_inc(&jme->link_changing);
 }
@@ -2408,8 +2408,8 @@
 	if (test_bit(JME_FLAG_POLL, &jme->flags)) {
 		JME_NAPI_ENABLE(jme);
 	} else {
-		tasklet_hi_enable(&jme->rxclean_task);
-		tasklet_hi_enable(&jme->rxempty_task);
+		tasklet_enable(&jme->rxclean_task);
+		tasklet_enable(&jme->rxempty_task);
 	}
 	dpi->cur		= PCC_P1;
 	dpi->attempt		= PCC_P1;
@@ -3290,8 +3290,8 @@
 	}
 
 	tasklet_enable(&jme->txclean_task);
-	tasklet_hi_enable(&jme->rxclean_task);
-	tasklet_hi_enable(&jme->rxempty_task);
+	tasklet_enable(&jme->rxclean_task);
+	tasklet_enable(&jme->rxempty_task);
 
 	jme_powersave_phy(jme);
 
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3dad7e8..14a1c5c 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5919,7 +5919,7 @@
 	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0x0000, ERIAR_EXGMAC);
 }
 
-static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
+static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
@@ -5954,6 +5954,24 @@
 	rtl_pcie_state_l2l3_enable(tp, false);
 }
 
+static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
+{
+	void __iomem *ioaddr = tp->mmio_addr;
+	static const struct ephy_info e_info_8168g_1[] = {
+		{ 0x00, 0x0000,	0x0008 },
+		{ 0x0c, 0x37d0,	0x0820 },
+		{ 0x1e, 0x0000,	0x0001 },
+		{ 0x19, 0x8000,	0x0000 }
+	};
+
+	rtl_hw_start_8168g(tp);
+
+	/* disable aspm and clock request before access ephy */
+	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
+	RTL_W8(Config5, RTL_R8(Config5) & ~ASPM_en);
+	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
+}
+
 static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 {
 	void __iomem *ioaddr = tp->mmio_addr;
@@ -5964,7 +5982,7 @@
 		{ 0x1e, 0xffff,	0x20eb }
 	};
 
-	rtl_hw_start_8168g_1(tp);
+	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
 	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
@@ -5983,7 +6001,7 @@
 		{ 0x1e, 0x0000,	0x2000 }
 	};
 
-	rtl_hw_start_8168g_1(tp);
+	rtl_hw_start_8168g(tp);
 
 	/* disable aspm and clock request before access ephy */
 	RTL_W8(Config2, RTL_R8(Config2) & ~ClkReqEn);
@@ -6605,6 +6623,9 @@
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
 
+	/* Force memory writes to complete before releasing descriptor */
+	dma_wmb();
+
 	desc->opts1 = cpu_to_le32(DescOwn | eor | rx_buf_sz);
 }
 
@@ -6612,7 +6633,6 @@
 				       u32 rx_buf_sz)
 {
 	desc->addr = cpu_to_le64(mapping);
-	wmb();
 	rtl8169_mark_to_asic(desc, rx_buf_sz);
 }
 
@@ -7073,16 +7093,18 @@
 
 	skb_tx_timestamp(skb);
 
-	wmb();
+	/* Force memory writes to complete before releasing descriptor */
+	dma_wmb();
 
 	/* Anti gcc 2.95.3 bugware (sic) */
 	status = opts[0] | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
 	txd->opts1 = cpu_to_le32(status);
 
-	tp->cur_tx += frags + 1;
-
+	/* Force all memory writes to complete before notifying device */
 	wmb();
 
+	tp->cur_tx += frags + 1;
+
 	RTL_W8(TxPoll, NPQ);
 
 	mmiowb();
@@ -7181,11 +7203,16 @@
 		struct ring_info *tx_skb = tp->tx_skb + entry;
 		u32 status;
 
-		rmb();
 		status = le32_to_cpu(tp->TxDescArray[entry].opts1);
 		if (status & DescOwn)
 			break;
 
+		/* This barrier is needed to keep us from reading
+		 * any other fields out of the Tx descriptor until
+		 * we know the status of DescOwn
+		 */
+		dma_rmb();
+
 		rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb,
 				     tp->TxDescArray + entry);
 		if (status & LastFrag) {
@@ -7280,11 +7307,16 @@
 		struct RxDesc *desc = tp->RxDescArray + entry;
 		u32 status;
 
-		rmb();
 		status = le32_to_cpu(desc->opts1) & tp->opts1_mask;
-
 		if (status & DescOwn)
 			break;
+
+		/* This barrier is needed to keep us from reading
+		 * any other fields out of the Rx descriptor until
+		 * we know the status of DescOwn
+		 */
+		dma_rmb();
+
 		if (unlikely(status & RxRES)) {
 			netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
 				   status);
@@ -7346,7 +7378,6 @@
 		}
 release_descriptor:
 		desc->opts2 = 0;
-		wmb();
 		rtl8169_mark_to_asic(desc, rx_buf_sz);
 	}
 
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 753630f..6279268 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -6,7 +6,7 @@
 	bool "SMC (SMSC)/Western Digital devices"
 	default y
 	depends on ARM || ISA || MAC || ARM64 || MIPS || M32R || SUPERH || \
-		BLACKFIN || MN10300 || COLDFIRE || XTENSA || PCI || PCMCIA
+		BLACKFIN || MN10300 || COLDFIRE || XTENSA || NIOS2 || PCI || PCMCIA
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y
 	  and read the Ethernet-HOWTO, available from
@@ -39,7 +39,7 @@
 	select CRC32
 	select MII
 	depends on (ARM || M32R || SUPERH || MIPS || BLACKFIN || \
-		    MN10300 || COLDFIRE || ARM64 || XTENSA)
+		    MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2)
 	---help---
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 90c86cd..45c408e 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -466,23 +466,6 @@
 	return err;
 }
 
-static u32 next_idx(u32 idx, struct vio_dring_state *dr)
-{
-	if (++idx == dr->num_entries)
-		idx = 0;
-	return idx;
-}
-
-static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
-{
-	if (idx == 0)
-		idx = dr->num_entries - 1;
-	else
-		idx--;
-
-	return idx;
-}
-
 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
 					struct vio_dring_state *dr,
 					u32 index)
@@ -556,7 +539,8 @@
 	int ack_start = -1, ack_end = -1;
 	bool send_ack = true;
 
-	end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
+	end = (end == (u32) -1) ? vio_dring_prev(dr, start)
+				: vio_dring_next(dr, end);
 
 	viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
 
@@ -570,7 +554,7 @@
 		if (ack_start == -1)
 			ack_start = start;
 		ack_end = start;
-		start = next_idx(start, dr);
+		start = vio_dring_next(dr, start);
 		if (ack && start != end) {
 			err = vnet_send_ack(port, dr, ack_start, ack_end,
 					    VIO_DRING_ACTIVE);
@@ -584,7 +568,7 @@
 		}
 	}
 	if (unlikely(ack_start == -1))
-		ack_start = ack_end = prev_idx(start, dr);
+		ack_start = ack_end = vio_dring_prev(dr, start);
 	if (send_ack) {
 		port->napi_resume = false;
 		return vnet_send_ack(port, dr, ack_start, ack_end,
@@ -633,7 +617,7 @@
 			found = 1;
 			break;
 		}
-		idx = next_idx(idx, dr);
+		idx = vio_dring_next(dr, idx);
 	}
 	return found;
 }
@@ -663,7 +647,7 @@
 	/* sync for race conditions with vnet_start_xmit() and tell xmit it
 	 * is time to send a trigger.
 	 */
-	dr->cons = next_idx(end, dr);
+	dr->cons = vio_dring_next(dr, end);
 	desc = vio_dring_entry(dr, dr->cons);
 	if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
 		/* vnet_start_xmit() just populated this dring but missed
@@ -784,7 +768,7 @@
 			pkt->tag.stype = VIO_SUBTYPE_INFO;
 			pkt->tag.stype_env = VIO_DRING_DATA;
 			pkt->seq = dr->rcv_nxt;
-			pkt->start_idx = next_idx(port->napi_stop_idx, dr);
+			pkt->start_idx = vio_dring_next(dr, port->napi_stop_idx);
 			pkt->end_idx = -1;
 			goto napi_resume;
 		}
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index 47872caa..3ad0e6e 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c
@@ -274,6 +274,7 @@
 
 	return phy;
 }
+EXPORT_SYMBOL_GPL(fixed_phy_register);
 
 static int __init fixed_mdio_bus_init(void)
 {
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index ff8a027..d2ab060 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -93,6 +93,7 @@
 	for (i = 0; i < sinfo->nskt; i++)
 		soc_pcmcia_remove_one(&sinfo->skt[i]);
 
+	clk_put(sinfo->clk);
 	kfree(sinfo);
 	return 0;
 }
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index 65b02c3..7bae7e5 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -145,6 +145,12 @@
 			return -ENOMEM;
 
 		s->soc.nr = ops->first + i;
+		s->soc.clk = clk_get(&dev->dev, NULL);
+		if (IS_ERR(s->soc.clk)) {
+			ret = PTR_ERR(s->soc.clk);
+			kfree(s);
+			return ret;
+		}
 		soc_pcmcia_init_one(&s->soc, ops, &dev->dev);
 		s->dev = dev;
 		if (s->soc.nr) {
@@ -220,6 +226,7 @@
 	for (; s; s = next) {
 		next = s->next;
 		soc_pcmcia_remove_one(&s->soc);
+		clk_put(s->soc.clk);
 		kfree(s);
 	}
 
diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c
index 54d3089..cf6de2c 100644
--- a/drivers/pcmcia/sa11xx_base.c
+++ b/drivers/pcmcia/sa11xx_base.c
@@ -135,14 +135,16 @@
 static int
 sa1100_pcmcia_set_timing(struct soc_pcmcia_socket *skt)
 {
-	return sa1100_pcmcia_set_mecr(skt, cpufreq_get(0));
+	unsigned long clk = clk_get_rate(skt->clk);
+
+	return sa1100_pcmcia_set_mecr(skt, clk / 1000);
 }
 
 static int
 sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf)
 {
 	struct soc_pcmcia_timing timing;
-	unsigned int clock = cpufreq_get(0);
+	unsigned int clock = clk_get_rate(skt->clk);
 	unsigned long mecr = MECR;
 	char *p = buf;
 
@@ -218,6 +220,11 @@
 	struct skt_dev_info *sinfo;
 	struct soc_pcmcia_socket *skt;
 	int i, ret = 0;
+	struct clk *clk;
+
+	clk = clk_get(dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
 
 	sa11xx_drv_pcmcia_ops(ops);
 
@@ -226,12 +233,14 @@
 		return -ENOMEM;
 
 	sinfo->nskt = nr;
+	sinfo->clk = clk;
 
 	/* Initialize processor specific parameters */
 	for (i = 0; i < nr; i++) {
 		skt = &sinfo->skt[i];
 
 		skt->nr = first + i;
+		skt->clk = clk;
 		soc_pcmcia_init_one(skt, ops, dev);
 
 		ret = sa11xx_drv_pcmcia_add_one(skt);
@@ -242,6 +251,7 @@
 	if (ret) {
 		while (--i >= 0)
 			soc_pcmcia_remove_one(&sinfo->skt[i]);
+		clk_put(clk);
 		kfree(sinfo);
 	} else {
 		dev_set_drvdata(dev, sinfo);
diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c
index a2bc6ee..933f465 100644
--- a/drivers/pcmcia/soc_common.c
+++ b/drivers/pcmcia/soc_common.c
@@ -120,6 +120,8 @@
 
 	if (skt->ops->hw_shutdown)
 		skt->ops->hw_shutdown(skt);
+
+	clk_disable_unprepare(skt->clk);
 }
 
 static void soc_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
@@ -131,6 +133,8 @@
 {
 	int ret = 0, i;
 
+	clk_prepare_enable(skt->clk);
+
 	if (skt->ops->hw_init) {
 		ret = skt->ops->hw_init(skt);
 		if (ret)
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 69fbfc8..a83d2ce 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -75,6 +75,7 @@
 static void *t4_uld_add(const struct cxgb4_lld_info *);
 static int t4_uld_rx_handler(void *, const __be64 *, const struct pkt_gl *);
 static int t4_uld_state_change(void *, enum cxgb4_state state);
+static inline int send_tx_flowc_wr(struct cxgbi_sock *);
 
 static const struct cxgb4_uld_info cxgb4i_uld_info = {
 	.name = DRV_MODULE_NAME,
@@ -157,12 +158,6 @@
 #define RCV_BUFSIZ_MASK		0x3FFU
 #define MAX_IMM_TX_PKT_LEN	128
 
-static inline void set_queue(struct sk_buff *skb, unsigned int queue,
-				const struct cxgbi_sock *csk)
-{
-	skb->queue_mapping = queue;
-}
-
 static int push_tx_frames(struct cxgbi_sock *, int);
 
 /*
@@ -172,10 +167,14 @@
  * Returns true if a packet can be sent as an offload WR with immediate
  * data.  We currently use the same limit as for Ethernet packets.
  */
-static inline int is_ofld_imm(const struct sk_buff *skb)
+static inline bool is_ofld_imm(const struct sk_buff *skb)
 {
-	return skb->len <= (MAX_IMM_TX_PKT_LEN -
-			sizeof(struct fw_ofld_tx_data_wr));
+	int len = skb->len;
+
+	if (likely(cxgbi_skcb_test_flag(skb, SKCBF_TX_NEED_HDR)))
+		len += sizeof(struct fw_ofld_tx_data_wr);
+
+	return len <= MAX_IMM_TX_PKT_LEN;
 }
 
 static void send_act_open_req(struct cxgbi_sock *csk, struct sk_buff *skb,
@@ -388,13 +387,19 @@
 
 	if (unlikely(csk->state == CTP_ABORTING) || !skb || !csk->cdev)
 		return;
+
+	if (!cxgbi_sock_flag(csk, CTPF_TX_DATA_SENT)) {
+		send_tx_flowc_wr(csk);
+		cxgbi_sock_set_flag(csk, CTPF_TX_DATA_SENT);
+	}
+
 	cxgbi_sock_set_state(csk, CTP_ABORTING);
 	cxgbi_sock_set_flag(csk, CTPF_ABORT_RPL_PENDING);
 	cxgbi_sock_purge_write_queue(csk);
 
 	csk->cpl_abort_req = NULL;
 	req = (struct cpl_abort_req *)skb->head;
-	set_queue(skb, CPL_PRIORITY_DATA, csk);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
 	req->cmd = CPL_ABORT_SEND_RST;
 	t4_set_arp_err_handler(skb, csk, abort_arp_failure);
 	INIT_TP_WR(req, csk->tid);
@@ -420,7 +425,7 @@
 		csk, csk->state, csk->flags, csk->tid, rst_status);
 
 	csk->cpl_abort_rpl = NULL;
-	set_queue(skb, CPL_PRIORITY_DATA, csk);
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
 	INIT_TP_WR(rpl, csk->tid);
 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, csk->tid));
 	rpl->cmd = rst_status;
@@ -491,20 +496,40 @@
 	return flits + sgl_len(cnt);
 }
 
-static inline void send_tx_flowc_wr(struct cxgbi_sock *csk)
+#define FLOWC_WR_NPARAMS_MIN	9
+static inline int tx_flowc_wr_credits(int *nparamsp, int *flowclenp)
+{
+	int nparams, flowclen16, flowclen;
+
+	nparams = FLOWC_WR_NPARAMS_MIN;
+	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+	flowclen16 = DIV_ROUND_UP(flowclen, 16);
+	flowclen = flowclen16 * 16;
+	/*
+	 * Return the number of 16-byte credits used by the FlowC request.
+	 * Pass back the nparams and actual FlowC length if requested.
+	 */
+	if (nparamsp)
+		*nparamsp = nparams;
+	if (flowclenp)
+		*flowclenp = flowclen;
+
+	return flowclen16;
+}
+
+static inline int send_tx_flowc_wr(struct cxgbi_sock *csk)
 {
 	struct sk_buff *skb;
 	struct fw_flowc_wr *flowc;
-	int flowclen, i;
+	int nparams, flowclen16, flowclen;
 
-	flowclen = 80;
+	flowclen16 = tx_flowc_wr_credits(&nparams, &flowclen);
 	skb = alloc_wr(flowclen, 0, GFP_ATOMIC);
 	flowc = (struct fw_flowc_wr *)skb->head;
 	flowc->op_to_nparams =
-		htonl(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(8));
+		htonl(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams));
 	flowc->flowid_len16 =
-		htonl(FW_WR_LEN16_V(DIV_ROUND_UP(72, 16)) |
-				FW_WR_FLOWID_V(csk->tid));
+		htonl(FW_WR_LEN16_V(flowclen16) | FW_WR_FLOWID_V(csk->tid));
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = htonl(csk->cdev->pfvf);
 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
@@ -523,12 +548,10 @@
 	flowc->mnemval[7].val = htonl(csk->advmss);
 	flowc->mnemval[8].mnemonic = 0;
 	flowc->mnemval[8].val = 0;
-	for (i = 0; i < 9; i++) {
-		flowc->mnemval[i].r4[0] = 0;
-		flowc->mnemval[i].r4[1] = 0;
-		flowc->mnemval[i].r4[2] = 0;
-	}
-	set_queue(skb, CPL_PRIORITY_DATA, csk);
+	flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX;
+	flowc->mnemval[8].val = 16384;
+
+	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
 
 	log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
 		"csk 0x%p, tid 0x%x, %u,%u,%u,%u,%u,%u,%u.\n",
@@ -537,6 +560,8 @@
 		csk->advmss);
 
 	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
+
+	return flowclen16;
 }
 
 static inline void make_tx_data_wr(struct cxgbi_sock *csk, struct sk_buff *skb,
@@ -545,10 +570,11 @@
 	struct fw_ofld_tx_data_wr *req;
 	unsigned int submode = cxgbi_skcb_ulp_mode(skb) & 3;
 	unsigned int wr_ulp_mode = 0, val;
+	bool imm = is_ofld_imm(skb);
 
 	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
 
-	if (is_ofld_imm(skb)) {
+	if (imm) {
 		req->op_to_immdlen = htonl(FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
 					FW_WR_COMPL_F |
 					FW_WR_IMMDLEN_V(dlen));
@@ -597,16 +623,32 @@
 		int dlen = skb->len;
 		int len = skb->len;
 		unsigned int credits_needed;
+		int flowclen16 = 0;
 
 		skb_reset_transport_header(skb);
 		if (is_ofld_imm(skb))
-			credits_needed = DIV_ROUND_UP(dlen +
-					sizeof(struct fw_ofld_tx_data_wr), 16);
+			credits_needed = DIV_ROUND_UP(dlen, 16);
 		else
-			credits_needed = DIV_ROUND_UP(8*calc_tx_flits_ofld(skb)
-					+ sizeof(struct fw_ofld_tx_data_wr),
+			credits_needed = DIV_ROUND_UP(
+						8 * calc_tx_flits_ofld(skb),
+						16);
+
+		if (likely(cxgbi_skcb_test_flag(skb, SKCBF_TX_NEED_HDR)))
+			credits_needed += DIV_ROUND_UP(
+					sizeof(struct fw_ofld_tx_data_wr),
 					16);
 
+		/*
+		 * Assumes the initial credits is large enough to support
+		 * fw_flowc_wr plus largest possible first payload
+		 */
+		if (!cxgbi_sock_flag(csk, CTPF_TX_DATA_SENT)) {
+			flowclen16 = send_tx_flowc_wr(csk);
+			csk->wr_cred -= flowclen16;
+			csk->wr_una_cred += flowclen16;
+			cxgbi_sock_set_flag(csk, CTPF_TX_DATA_SENT);
+		}
+
 		if (csk->wr_cred < credits_needed) {
 			log_debug(1 << CXGBI_DBG_PDU_TX,
 				"csk 0x%p, skb %u/%u, wr %d < %u.\n",
@@ -615,8 +657,8 @@
 			break;
 		}
 		__skb_unlink(skb, &csk->write_queue);
-		set_queue(skb, CPL_PRIORITY_DATA, csk);
-		skb->csum = credits_needed;
+		set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
+		skb->csum = credits_needed + flowclen16;
 		csk->wr_cred -= credits_needed;
 		csk->wr_una_cred += credits_needed;
 		cxgbi_sock_enqueue_wr(csk, skb);
@@ -627,12 +669,6 @@
 			csk->wr_cred, csk->wr_una_cred);
 
 		if (likely(cxgbi_skcb_test_flag(skb, SKCBF_TX_NEED_HDR))) {
-			if (!cxgbi_sock_flag(csk, CTPF_TX_DATA_SENT)) {
-				send_tx_flowc_wr(csk);
-				skb->csum += 5;
-				csk->wr_cred -= 5;
-				csk->wr_una_cred += 5;
-			}
 			len += cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb));
 			make_tx_data_wr(csk, skb, dlen, len, credits_needed,
 					req_completion);
@@ -807,6 +843,13 @@
 
 }
 
+static inline bool is_neg_adv(unsigned int status)
+{
+	return status == CPL_ERR_RTX_NEG_ADVICE ||
+		status == CPL_ERR_KEEPALV_NEG_ADVICE ||
+		status == CPL_ERR_PERSIST_NEG_ADVICE;
+}
+
 static void do_act_open_rpl(struct cxgbi_device *cdev, struct sk_buff *skb)
 {
 	struct cxgbi_sock *csk;
@@ -828,7 +871,7 @@
 		       "csk 0x%p,%u,0x%lx. ", (&csk->saddr), (&csk->daddr),
 		       atid, tid, status, csk, csk->state, csk->flags);
 
-	if (status == CPL_ERR_RTX_NEG_ADVICE)
+	if (is_neg_adv(status))
 		goto rel_skb;
 
 	module_put(THIS_MODULE);
@@ -934,8 +977,7 @@
 		       (&csk->saddr), (&csk->daddr),
 		       csk, csk->state, csk->flags, csk->tid, req->status);
 
-	if (req->status == CPL_ERR_RTX_NEG_ADVICE ||
-	    req->status == CPL_ERR_PERSIST_NEG_ADVICE)
+	if (is_neg_adv(req->status))
 		goto rel_skb;
 
 	cxgbi_sock_get(csk);
@@ -989,6 +1031,27 @@
 	__kfree_skb(skb);
 }
 
+static void do_rx_data(struct cxgbi_device *cdev, struct sk_buff *skb)
+{
+	struct cxgbi_sock *csk;
+	struct cpl_rx_data *cpl = (struct cpl_rx_data *)skb->data;
+	unsigned int tid = GET_TID(cpl);
+	struct cxgb4_lld_info *lldi = cxgbi_cdev_priv(cdev);
+	struct tid_info *t = lldi->tids;
+
+	csk = lookup_tid(t, tid);
+	if (!csk) {
+		pr_err("can't find connection for tid %u.\n", tid);
+	} else {
+		/* not expecting this, reset the connection. */
+		pr_err("csk 0x%p, tid %u, rcv cpl_rx_data.\n", csk, tid);
+		spin_lock_bh(&csk->lock);
+		send_abort_req(csk);
+		spin_unlock_bh(&csk->lock);
+	}
+	__kfree_skb(skb);
+}
+
 static void do_rx_iscsi_hdr(struct cxgbi_device *cdev, struct sk_buff *skb)
 {
 	struct cxgbi_sock *csk;
@@ -1408,6 +1471,7 @@
 	[CPL_SET_TCB_RPL] = do_set_tcb_rpl,
 	[CPL_RX_DATA_DDP] = do_rx_data_ddp,
 	[CPL_RX_ISCSI_DDP] = do_rx_data_ddp,
+	[CPL_RX_DATA] = do_rx_data,
 };
 
 int cxgb4i_ofld_init(struct cxgbi_device *cdev)
@@ -1485,7 +1549,7 @@
 		return -ENOMEM;
 	}
 	req = (struct ulp_mem_io *)skb->head;
-	set_queue(skb, CPL_PRIORITY_CONTROL, NULL);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 
 	ulp_mem_io_set_hdr(lldi, req, wr_len, dlen, pm_addr);
 	idata = (struct ulptx_idata *)(req + 1);
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index 7da59c3..eb58afc 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -2294,10 +2294,12 @@
 		return err;
 	}
 
-	kfree_skb(skb);
 	log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX,
 		"itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n",
 		task->itt, skb, skb->len, skb->data_len, err);
+
+	kfree_skb(skb);
+
 	iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err);
 	iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED);
 	return err;
diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h
index 2c7cb1c..aba1af7 100644
--- a/drivers/scsi/cxgbi/libcxgbi.h
+++ b/drivers/scsi/cxgbi/libcxgbi.h
@@ -317,8 +317,8 @@
 	__clear_bit(flag, &(cxgbi_skcb_flags(skb)));
 }
 
-static inline int cxgbi_skcb_test_flag(struct sk_buff *skb,
-					enum cxgbi_skcb_flags flag)
+static inline int cxgbi_skcb_test_flag(const struct sk_buff *skb,
+				       enum cxgbi_skcb_flags flag)
 {
 	return test_bit(flag, &(cxgbi_skcb_flags(skb)));
 }
diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c
index 9690216..c0abe27 100644
--- a/drivers/video/fbdev/sa1100fb.c
+++ b/drivers/video/fbdev/sa1100fb.c
@@ -178,6 +178,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mutex.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 
 #include <video/sa1100fb.h>
 
@@ -416,9 +417,9 @@
 		var->transp.offset);
 
 #ifdef CONFIG_CPU_FREQ
-	dev_dbg(fbi->dev, "dma period = %d ps, clock = %d kHz\n",
+	dev_dbg(fbi->dev, "dma period = %d ps, clock = %ld kHz\n",
 		sa1100fb_display_dma_period(var),
-		cpufreq_get(smp_processor_id()));
+		clk_get_rate(fbi->clk) / 1000);
 #endif
 
 	return 0;
@@ -592,9 +593,10 @@
  * Calculate the PCD value from the clock rate (in picoseconds).
  * We take account of the PPCR clock setting.
  */
-static inline unsigned int get_pcd(unsigned int pixclock, unsigned int cpuclock)
+static inline unsigned int get_pcd(struct sa1100fb_info *fbi,
+		unsigned int pixclock)
 {
-	unsigned int pcd = cpuclock / 100;
+	unsigned int pcd = clk_get_rate(fbi->clk) / 100 / 1000;
 
 	pcd *= pixclock;
 	pcd /= 10000000;
@@ -673,7 +675,7 @@
 		LCCR2_BegFrmDel(var->upper_margin) +
 		LCCR2_EndFrmDel(var->lower_margin);
 
-	pcd = get_pcd(var->pixclock, cpufreq_get(0));
+	pcd = get_pcd(fbi, var->pixclock);
 	new_regs.lccr3 = LCCR3_PixClkDiv(pcd) | fbi->inf->lccr3 |
 		(var->sync & FB_SYNC_HOR_HIGH_ACT ? LCCR3_HorSnchH : LCCR3_HorSnchL) |
 		(var->sync & FB_SYNC_VERT_HIGH_ACT ? LCCR3_VrtSnchH : LCCR3_VrtSnchL);
@@ -787,6 +789,9 @@
 	fbi->palette_cpu[0] &= 0xcfff;
 	fbi->palette_cpu[0] |= palette_pbs(&fbi->fb.var);
 
+	/* enable LCD controller clock */
+	clk_prepare_enable(fbi->clk);
+
 	/* Sequence from 11.7.10 */
 	writel_relaxed(fbi->reg_lccr3, fbi->base + LCCR3);
 	writel_relaxed(fbi->reg_lccr2, fbi->base + LCCR2);
@@ -831,6 +836,9 @@
 
 	schedule_timeout(20 * HZ / 1000);
 	remove_wait_queue(&fbi->ctrlr_wait, &wait);
+
+	/* disable LCD controller clock */
+	clk_disable_unprepare(fbi->clk);
 }
 
 /*
@@ -1009,7 +1017,6 @@
 			 void *data)
 {
 	struct sa1100fb_info *fbi = TO_INF(nb, freq_transition);
-	struct cpufreq_freqs *f = data;
 	u_int pcd;
 
 	switch (val) {
@@ -1018,7 +1025,7 @@
 		break;
 
 	case CPUFREQ_POSTCHANGE:
-		pcd = get_pcd(fbi->fb.var.pixclock, f->new);
+		pcd = get_pcd(fbi, fbi->fb.var.pixclock);
 		fbi->reg_lccr3 = (fbi->reg_lccr3 & ~0xff) | LCCR3_PixClkDiv(pcd);
 		set_ctrlr_state(fbi, C_ENABLE_CLKCHANGE);
 		break;
@@ -1225,6 +1232,13 @@
 	if (!fbi)
 		goto failed;
 
+	fbi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(fbi->clk)) {
+		ret = PTR_ERR(fbi->clk);
+		fbi->clk = NULL;
+		goto failed;
+	}
+
 	fbi->base = ioremap(res->start, resource_size(res));
 	if (!fbi->base)
 		goto failed;
@@ -1277,6 +1291,8 @@
  failed:
 	if (fbi)
 		iounmap(fbi->base);
+	if (fbi->clk)
+		clk_put(fbi->clk);
 	kfree(fbi);
 	release_mem_region(res->start, resource_size(res));
 	return ret;
diff --git a/drivers/video/fbdev/sa1100fb.h b/drivers/video/fbdev/sa1100fb.h
index fc5d429..0139d13 100644
--- a/drivers/video/fbdev/sa1100fb.h
+++ b/drivers/video/fbdev/sa1100fb.h
@@ -68,6 +68,7 @@
 #endif
 
 	const struct sa1100fb_mach_info *inf;
+	struct clk *clk;
 };
 
 #define TO_INF(ptr,member)	container_of(ptr,struct sa1100fb_info,member)
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 1402fa8..f5c40b0f 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -42,6 +42,14 @@
 #define wmb()	mb()
 #endif
 
+#ifndef dma_rmb
+#define dma_rmb()	rmb()
+#endif
+
+#ifndef dma_wmb
+#define dma_wmb()	wmb()
+#endif
+
 #ifndef read_barrier_depends
 #define read_barrier_depends()		do { } while (0)
 #endif
diff --git a/include/dt-bindings/dma/at91.h b/include/dt-bindings/dma/at91.h
index e835037..ab6cbba 100644
--- a/include/dt-bindings/dma/at91.h
+++ b/include/dt-bindings/dma/at91.h
@@ -9,6 +9,8 @@
 #ifndef __DT_BINDINGS_AT91_DMA_H__
 #define __DT_BINDINGS_AT91_DMA_H__
 
+/* ---------- HDMAC ---------- */
+
 /*
  * Source and/or destination peripheral ID
  */
@@ -24,4 +26,27 @@
 #define AT91_DMA_CFG_FIFOCFG_ALAP	(0x1 << AT91_DMA_CFG_FIFOCFG_OFFSET)	/* largest defined AHB burst */
 #define AT91_DMA_CFG_FIFOCFG_ASAP	(0x2 << AT91_DMA_CFG_FIFOCFG_OFFSET)	/* single AHB access */
 
+
+/* ---------- XDMAC ---------- */
+#define AT91_XDMAC_DT_MEM_IF_MASK	(0x1)
+#define AT91_XDMAC_DT_MEM_IF_OFFSET	(13)
+#define AT91_XDMAC_DT_MEM_IF(mem_if)	(((mem_if) & AT91_XDMAC_DT_MEM_IF_MASK) \
+					<< AT91_XDMAC_DT_MEM_IF_OFFSET)
+#define AT91_XDMAC_DT_GET_MEM_IF(cfg)	(((cfg) >> AT91_XDMAC_DT_MEM_IF_OFFSET) \
+					& AT91_XDMAC_DT_MEM_IF_MASK)
+
+#define AT91_XDMAC_DT_PER_IF_MASK	(0x1)
+#define AT91_XDMAC_DT_PER_IF_OFFSET	(14)
+#define AT91_XDMAC_DT_PER_IF(per_if)	(((per_if) & AT91_XDMAC_DT_PER_IF_MASK) \
+					<< AT91_XDMAC_DT_PER_IF_OFFSET)
+#define AT91_XDMAC_DT_GET_PER_IF(cfg)	(((cfg) >> AT91_XDMAC_DT_PER_IF_OFFSET) \
+					& AT91_XDMAC_DT_PER_IF_MASK)
+
+#define AT91_XDMAC_DT_PERID_MASK	(0x7f)
+#define AT91_XDMAC_DT_PERID_OFFSET	(24)
+#define AT91_XDMAC_DT_PERID(perid)	(((perid) & AT91_XDMAC_DT_PERID_MASK) \
+					<< AT91_XDMAC_DT_PERID_OFFSET)
+#define AT91_XDMAC_DT_GET_PERID(cfg)	(((cfg) >> AT91_XDMAC_DT_PERID_OFFSET) \
+					& AT91_XDMAC_DT_PERID_MASK)
+
 #endif /* __DT_BINDINGS_AT91_DMA_H__ */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c324f57..ac02f9b 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -97,6 +97,16 @@
 #define amba_pclk_disable(d)	\
 	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
 
+static inline int amba_pclk_prepare(struct amba_device *dev)
+{
+	return clk_prepare(dev->pclk);
+}
+
+static inline void amba_pclk_unprepare(struct amba_device *dev)
+{
+	clk_unprepare(dev->pclk);
+}
+
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 653a1fd..40cd75e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -447,7 +447,8 @@
  * 	communicate status
  * @phys: physical address of the descriptor
  * @chan: target channel for this operation
- * @tx_submit: set the prepared descriptor(s) to be executed by the engine
+ * @tx_submit: accept the descriptor, assign ordered cookie and mark the
+ * descriptor pending. To be pushed on .issue_pending() call
  * @callback: routine to call after this operation is complete
  * @callback_param: general parameter to pass to the callback routine
  * ---async_tx api specific fields---
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 593fff9..3062495 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -30,6 +30,12 @@
 
 struct acpi_dmar_header;
 
+#ifdef	CONFIG_X86
+# define	DMAR_UNITS_SUPPORTED	MAX_IO_APICS
+#else
+# define	DMAR_UNITS_SUPPORTED	64
+#endif
+
 /* DMAR Flags */
 #define DMAR_INTR_REMAP		0x1
 #define DMAR_X2APIC_OPT_OUT	0x2
@@ -120,29 +126,61 @@
 /* Intel IOMMU detection */
 extern int detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
+extern int dmar_device_add(acpi_handle handle);
+extern int dmar_device_remove(acpi_handle handle);
+
+static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg)
+{
+	return 0;
+}
 
 #ifdef CONFIG_INTEL_IOMMU
 extern int iommu_detected, no_iommu;
 extern int intel_iommu_init(void);
-extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
-extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
+extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg);
+extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg);
+extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg);
+extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg);
+extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert);
 extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
 #else /* !CONFIG_INTEL_IOMMU: */
 static inline int intel_iommu_init(void) { return -ENODEV; }
-static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
-static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
+
+#define	dmar_parse_one_rmrr		dmar_res_noop
+#define	dmar_parse_one_atsr		dmar_res_noop
+#define	dmar_check_one_atsr		dmar_res_noop
+#define	dmar_release_one_atsr		dmar_res_noop
+
 static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
 	return 0;
 }
+
+static inline int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	return 0;
+}
 #endif /* CONFIG_INTEL_IOMMU */
 
+#ifdef CONFIG_IRQ_REMAP
+extern int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert);
+#else  /* CONFIG_IRQ_REMAP */
+static inline int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{ return 0; }
+#endif /* CONFIG_IRQ_REMAP */
+
+#else /* CONFIG_DMAR_TABLE */
+
+static inline int dmar_device_add(void *handle)
+{
+	return 0;
+}
+
+static inline int dmar_device_remove(void *handle)
+{
+	return 0;
+}
+
 #endif /* CONFIG_DMAR_TABLE */
 
 struct irte {
diff --git a/include/linux/fence.h b/include/linux/fence.h
index d174585..39efee1 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -128,8 +128,8 @@
  * from irq context, so normal spinlocks can be used.
  *
  * A return value of false indicates the fence already passed,
- * or some failure occured that made it impossible to enable
- * signaling. True indicates succesful enabling.
+ * or some failure occurred that made it impossible to enable
+ * signaling. True indicates successful enabling.
  *
  * fence->status may be set in enable_signaling, but only when false is
  * returned.
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b556e0a..70ee0d3 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -359,7 +359,7 @@
  * to name two of the most common.
  *
  * The return codes from the @master_xfer field should indicate the type of
- * error code that occured during the transfer, as documented in the kernel
+ * error code that occurred during the transfer, as documented in the kernel
  * Documentation file Documentation/i2c/fault-codes.
  */
 struct i2c_algorithm {
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 69517a2..d9b05b5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -556,12 +556,6 @@
 	atomic_dec(&t->count);
 }
 
-static inline void tasklet_hi_enable(struct tasklet_struct *t)
-{
-	smp_mb__before_atomic();
-	atomic_dec(&t->count);
-}
-
 extern void tasklet_kill(struct tasklet_struct *t);
 extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
 extern void tasklet_init(struct tasklet_struct *t,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b29a598..7a7bd15 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -28,7 +28,7 @@
 #define IOMMU_READ	(1 << 0)
 #define IOMMU_WRITE	(1 << 1)
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
-#define IOMMU_EXEC	(1 << 3)
+#define IOMMU_NOEXEC	(1 << 3)
 
 struct iommu_ops;
 struct iommu_group;
@@ -62,6 +62,7 @@
 	IOMMU_CAP_CACHE_COHERENCY,	/* IOMMU can enforce cache coherent DMA
 					   transactions */
 	IOMMU_CAP_INTR_REMAP,		/* IOMMU supports interrupt isolation */
+	IOMMU_CAP_NOEXEC,		/* IOMMU_NOEXEC flag */
 };
 
 /*
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 76d2acb..838dbfa 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -37,6 +37,7 @@
 
 #include <linux/list.h>
 #include <linux/proc_fs.h>
+#include <linux/acpi.h> /* For acpi_handle */
 
 struct module;
 struct device;
@@ -278,15 +279,18 @@
 	SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS,
 	SI_PCI,	SI_DEVICETREE, SI_DEFAULT
 };
+const char *ipmi_addr_src_to_str(enum ipmi_addr_src src);
 
 union ipmi_smi_info_union {
+#ifdef CONFIG_ACPI
 	/*
 	 * the acpi_info element is defined for the SI_ACPI
 	 * address type
 	 */
 	struct {
-		void *acpi_handle;
+		acpi_handle acpi_handle;
 	} acpi_info;
+#endif
 };
 
 struct ipmi_smi_info {
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index bd34924..0b1e569 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -98,12 +98,11 @@
 	   operation is not allowed to fail.  If an error occurs, it
 	   should report back the error in a received message.  It may
 	   do this in the current call context, since no write locks
-	   are held when this is run.  If the priority is > 0, the
-	   message will go into a high-priority queue and be sent
-	   first.  Otherwise, it goes into a normal-priority queue. */
+	   are held when this is run.  Message are delivered one at
+	   a time by the message handler, a new message will not be
+	   delivered until the previous message is returned. */
 	void (*sender)(void                *send_info,
-		       struct ipmi_smi_msg *msg,
-		       int                 priority);
+		       struct ipmi_smi_msg *msg);
 
 	/* Called by the upper layer to request that we try to get
 	   events from the BMC we are attached to. */
@@ -212,7 +211,6 @@
 		      void                     *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *dev,
-		      const char               *sysfs_name,
 		      unsigned char            slave_addr);
 
 /*
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 6a1357d..7d964e7 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -41,6 +41,7 @@
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
 	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
+	IMX_DMATYPE_SAI,	/* SAI */
 };
 
 enum imx_dma_prio {
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index eda4fee..30e84d4 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -118,6 +118,11 @@
 	ACCESS_ONCE(dev->power.last_busy) = jiffies;
 }
 
+static inline bool pm_runtime_is_irq_safe(struct device *dev)
+{
+	return dev->power.irq_safe;
+}
+
 #else /* !CONFIG_PM */
 
 static inline bool queue_pm_work(struct work_struct *work) { return false; }
@@ -164,6 +169,7 @@
 
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
 static inline void pm_runtime_irq_safe(struct device *dev) {}
+static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; }
 
 static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; }
 static inline void pm_runtime_mark_last_busy(struct device *dev) {}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3768050..1189564 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -579,11 +579,12 @@
 		      (1<<__LINK_STATE_PRESENT);
 
 	dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
-			   NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
+			   NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
 			   NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM |
 			   NETIF_F_ALL_FCOE;
 
-	dev->features |= real_dev->vlan_features | NETIF_F_LLTX;
+	dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
+			 NETIF_F_GSO_SOFTWARE;
 	dev->gso_max_size = real_dev->gso_max_size;
 	if (dev->features & NETIF_F_VLAN_FEATURES)
 		netdev_warn(real_dev, "VLAN features are set incorrectly.  Q-in-Q configurations may not work correctly.\n");
@@ -648,7 +649,7 @@
 	features |= NETIF_F_RXCSUM;
 	features = netdev_intersect_features(features, real_dev->features);
 
-	features |= old_features & NETIF_F_SOFT_FEATURES;
+	features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
 	features |= NETIF_F_LLTX;
 
 	return features;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 528380a..515569f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -512,7 +512,7 @@
 }
 
 /* slave device setup *******************************************************/
-static void dsa_slave_phy_setup(struct dsa_slave_priv *p,
+static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 				struct net_device *slave_dev)
 {
 	struct dsa_switch *ds = p->parent;
@@ -533,7 +533,7 @@
 		ret = of_phy_register_fixed_link(port_dn);
 		if (ret) {
 			netdev_err(slave_dev, "failed to register fixed PHY\n");
-			return;
+			return ret;
 		}
 		phy_is_fixed = true;
 		phy_dn = port_dn;
@@ -555,12 +555,17 @@
 	 */
 	if (!p->phy) {
 		p->phy = ds->slave_mii_bus->phy_map[p->port];
+		if (!p->phy)
+			return -ENODEV;
+
 		phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
 				   p->phy_interface);
 	} else {
 		netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
 			    p->phy->addr, p->phy->drv->name);
 	}
+
+	return 0;
 }
 
 int dsa_slave_suspend(struct net_device *slave_dev)
@@ -653,12 +658,17 @@
 	p->old_link = -1;
 	p->old_duplex = -1;
 
-	dsa_slave_phy_setup(p, slave_dev);
+	ret = dsa_slave_phy_setup(p, slave_dev);
+	if (ret) {
+		free_netdev(slave_dev);
+		return NULL;
+	}
 
 	ret = register_netdev(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
+		phy_disconnect(p->phy);
 		free_netdev(slave_dev);
 		return NULL;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e9cb258..18bcaf2 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1143,8 +1143,9 @@
 			put_child(tp, cindex, (struct rt_trie_node *)tn);
 		} else {
 			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
-			tp = tn;
 		}
+
+		tp = tn;
 	}
 
 	if (tp && tp->pos + tp->bits > 32)